Changeset 11977 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies
- Timestamp:
- 02/11/15 03:01:59 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs
r11976 r11977 62 62 63 63 64 /*65 const double beta = 1;64 65 const double beta = 20; 66 66 var w = from idx in Enumerable.Range(0, maxIdx) 67 67 let afterStateQ = activeAfterStates[idx] … … 71 71 selectedStateIdx = actionIndexMap[bestAction]; 72 72 Debug.Assert(selectedStateIdx >= 0); 73 */74 75 73 74 75 /* 76 76 if (random.NextDouble() < 0.2) { 77 77 selectedStateIdx = actionIndexMap[random.Next(maxIdx)]; … … 91 91 selectedStateIdx = actionIndexMap[bestIdxs[random.Next(bestIdxs.Count)]]; 92 92 } 93 94 95 96 93 */ 97 94 return true; 98 95 } … … 132 129 133 130 public double GetValue(string state) { 134 return problem.GetFeatures(state). Average(feature => GetWeight(feature));131 return problem.GetFeatures(state).Sum(feature => GetWeight(feature)); 135 132 } 136 133 … … 142 139 private void UpdateWeights(string state, double reward) { 143 140 double delta = reward - GetValue(state); 144 delta /= problem.GetFeatures(state).Count();145 const double alpha = 0.001;141 // delta /= problem.GetFeatures(state).Count(); 142 //const double alpha = 0.01; 146 143 foreach (var feature in problem.GetFeatures(state)) { 147 144 featureTries[feature.Id] = GetFeatureTries(feature.Id) + 1; 148 145 Debug.Assert(GetFeatureTries(feature.Id) >= 1); 149 //double alpha = 1.0 / GetFeatureTries(feature.Id);150 //alpha = Math.Max(alpha, 0.01);146 double alpha = 1.0 / GetFeatureTries(feature.Id); 147 alpha = Math.Max(alpha, 0.001); 151 148 152 149 double w;
Note: See TracChangeset
for help on using the changeset viewer.