Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
02/11/15 02:22:18 (10 years ago)
Author:
gkronber
Message:

#2283 worked on seq search for ant

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs

    r11974 r11976  
    1414  public sealed class GenericFunctionApproximationGrammarPolicy : IGrammarPolicy {
    1515    private Dictionary<string, double> featureWeigths; // stores the necessary information for bandit policies for each state (=canonical phrase)
     16    private Dictionary<string, int> featureTries;
    1617    private HashSet<string> done;
    1718    private readonly bool useCanonicalPhrases;
    1819    private readonly IProblem problem;
     20
    1921
    2022
     
    2325      this.problem = problem;
    2426      this.featureWeigths = new Dictionary<string, double>();
     27      this.featureTries = new Dictionary<string, int>();
    2528      this.done = new HashSet<string>();
    2629    }
     
    5760        originalIdx++;
    5861      }
    59      
    60       const double beta = 20.0;
    61       var w = from q in activeAfterStates
    62               select Math.Exp(beta * q);
     62
     63
     64      /*
     65      const double beta = 1;
     66      var w = from idx in Enumerable.Range(0, maxIdx)
     67              let afterStateQ = activeAfterStates[idx]
     68              select Math.Exp(beta * afterStateQ);
    6369
    6470      var bestAction = Enumerable.Range(0, maxIdx).SampleProportional(random, w);
    6571      selectedStateIdx = actionIndexMap[bestAction];
    6672      Debug.Assert(selectedStateIdx >= 0);
    67      
    68       /*
     73      */
     74
     75
    6976      if (random.NextDouble() < 0.2) {
    7077        selectedStateIdx = actionIndexMap[random.Next(maxIdx)];
     
    8491        selectedStateIdx = actionIndexMap[bestIdxs[random.Next(bestIdxs.Count)]];
    8592      }
    86       */
     93
    8794
    8895
     
    114121
    115122    public int GetTries(string state) {
    116       return 1;
     123      return 0;
     124    }
     125
     126    public int GetFeatureTries(string featureId) {
     127      int t;
     128      if (featureTries.TryGetValue(featureId, out t)) {
     129        return t;
     130      } else return 0;
    117131    }
    118132
    119133    public double GetValue(string state) {
    120       return problem.GetFeatures(state).Sum(feature => GetWeight(feature));
     134      return problem.GetFeatures(state).Average(feature => GetWeight(feature));
    121135    }
    122136
     
    124138      double w;
    125139      if (featureWeigths.TryGetValue(feature.Id, out w)) return w * feature.Value;
    126       else return 0.0; // TODO: alternatives?
     140      else return 0.0;
    127141    }
    128142    private void UpdateWeights(string state, double reward) {
    129       const double alpha = 0.01;
    130143      double delta = reward - GetValue(state);
     144      delta /= problem.GetFeatures(state).Count();
     145      const double alpha = 0.001;
    131146      foreach (var feature in problem.GetFeatures(state)) {
     147        featureTries[feature.Id] = GetFeatureTries(feature.Id) + 1;
     148        Debug.Assert(GetFeatureTries(feature.Id) >= 1);
     149        //double alpha = 1.0 / GetFeatureTries(feature.Id);
     150        //alpha = Math.Max(alpha, 0.01);
     151
    132152        double w;
    133153        if (!featureWeigths.TryGetValue(feature.Id, out w)) {
    134           featureWeigths[feature.Id] = alpha * delta;
     154          featureWeigths[feature.Id] = alpha * delta * feature.Value;
    135155        } else {
    136           featureWeigths[feature.Id] += alpha * delta;
     156          featureWeigths[feature.Id] += alpha * delta * feature.Value;
    137157        }
    138158      }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs

    r11850 r11976  
    166166    private void DistributeReward(double reward) {
    167167      behaviourPolicy.UpdateReward(stateChain, reward);
    168       greedyPolicy.UpdateReward(stateChain, reward);
     168      //greedyPolicy.UpdateReward(stateChain, reward);
    169169    }
    170170
Note: See TracChangeset for help on using the changeset viewer.