Free cookie consent management tool by TermsFeed Policy Generator

Changeset 11974


Ignore:
Timestamp:
02/10/15 02:05:31 (10 years ago)
Author:
gkronber
Message:

#2283: eurocast experiments

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs

    r11832 r11974  
    5757        }
    5858
     59        // very unlikely to be the same (and we don't care)
    5960        if (theta > bestQ) {
    6061          bestQ = theta;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs

    r11806 r11974  
    2323        aIdx++;
    2424        var q = aInfo.SampleExpectedReward(random);
     25        // very unlikely to be equal and we don't care
    2526        if (q > bestQ) {
    2627          bestQ = q;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs

    r11832 r11974  
    5757        originalIdx++;
    5858      }
     59     
     60      const double beta = 20.0;
     61      var w = from q in activeAfterStates
     62              select Math.Exp(beta * q);
    5963
     64      var bestAction = Enumerable.Range(0, maxIdx).SampleProportional(random, w);
     65      selectedStateIdx = actionIndexMap[bestAction];
     66      Debug.Assert(selectedStateIdx >= 0);
     67     
     68      /*
    6069      if (random.NextDouble() < 0.2) {
    6170        selectedStateIdx = actionIndexMap[random.Next(maxIdx)];
     
    6372        // find max
    6473        var bestQ = double.NegativeInfinity;
    65         var bestIdx = -1;
     74        var bestIdxs = new List<int>();
    6675        for (int i = 0; i < maxIdx; i++) {
    6776          if (activeAfterStates[i] > bestQ) {
     77            bestIdxs.Clear();
     78            bestIdxs.Add(i);
    6879            bestQ = activeAfterStates[i];
    69             bestIdx = i;
     80          } else if (activeAfterStates[i].IsAlmost(bestQ)) {
     81            bestIdxs.Add(i);
    7082          }
    7183        }
    72         selectedStateIdx = actionIndexMap[bestIdx];
     84        selectedStateIdx = actionIndexMap[bestIdxs[random.Next(bestIdxs.Count)]];
    7385      }
     86      */
     87
     88
    7489
    7590      return true;
     
    103118
    104119    public double GetValue(string state) {
    105       return problem.GetFeatures(state).Sum(feature => GetWeight(feature)) ;
     120      return problem.GetFeatures(state).Sum(feature => GetWeight(feature));
    106121    }
    107122
     
    109124      double w;
    110125      if (featureWeigths.TryGetValue(feature.Id, out w)) return w * feature.Value;
    111       else return 0.0;
     126      else return 0.0; // TODO: alternatives?
    112127    }
    113128    private void UpdateWeights(string state, double reward) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs

    r11846 r11974  
    77namespace HeuristicLab.Common {
    88  public static class Extensions {
    9    
     9
    1010    public static bool IsAlmost(this double x, double y) {
     11      if (double.IsNaN(x) || double.IsNaN(y)) return false;
     12      if (double.IsPositiveInfinity(x) && double.IsPositiveInfinity(y)) return true;
     13      if (double.IsNegativeInfinity(x) && double.IsNegativeInfinity(y)) return true;
    1114      return Math.Abs(x - y) < 1.0e-12;
    1215    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs

    r11857 r11974  
    126126
    127127    public IEnumerable<Feature> GetFeatures(string phrase) {
    128       return Enumerable.Repeat(new Feature(CanonicalRepresentation(phrase), 1.0), 1);
     128      yield return new Feature(CanonicalRepresentation(phrase), 1.0);
     129      // yield return new Feature("Length", phrase.Length); //
     130      // foreach (var pair in phrase.Zip(phrase.Skip(1), Tuple.Create)) {
     131      //   yield return new Feature(pair.Item1.ToString() + pair.Item2, 1.0);
     132      // }
     133      // number of occurances for each symbol
    129134    }
    130135
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11973 r11974  
    3030      //RunGridTest();
    3131      //RunGpGridTest();
    32       RunFunApproxTest();
     32     RunFunApproxTest();
    3333    }
    3434
     
    3636      int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
    3737      //var globalRandom = new Random(31415);
    38       var localRandSeed = 31415;
     38      var localRandSeed = new Random().Next();
    3939      var reps = 20;
    4040
    4141      var policyFactories = new Func<IBanditPolicy>[]
    4242        {
    43          () => new RandomPolicy(),
    44           () => new ActiveLearningPolicy(), 
    45          () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
    46          () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
    47          () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
    48          () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
    49          //() => new GaussianThompsonSamplingPolicy(),
    50          () => new GaussianThompsonSamplingPolicy(true),
    51          () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
    52          () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
    53          //() => new BernoulliThompsonSamplingPolicy(),
    54          () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
    55          () => new EpsGreedyPolicy(0.01),
    56          () => new EpsGreedyPolicy(0.05),
    57          () => new EpsGreedyPolicy(0.1),
    58          () => new EpsGreedyPolicy(0.2),
    59          () => new EpsGreedyPolicy(0.5),
    60          () => new UCTPolicy(0.01),
    61          () => new UCTPolicy(0.05),
    62          () => new UCTPolicy(0.1),
    63          () => new UCTPolicy(0.5),
    64          () => new UCTPolicy(1),
    65          () => new UCTPolicy(2),
    66          () => new UCTPolicy( 5),
    67          () => new UCTPolicy( 10),
    68          () => new ModifiedUCTPolicy(0.01),
    69          () => new ModifiedUCTPolicy(0.05),
    70          () => new ModifiedUCTPolicy(0.1),
    71          () => new ModifiedUCTPolicy(0.5),
    72          () => new ModifiedUCTPolicy(1),
    73          () => new ModifiedUCTPolicy(2),
    74          () => new ModifiedUCTPolicy( 5),
    75          () => new ModifiedUCTPolicy( 10),
    76          () => new UCB1Policy(),
    77          () => new UCB1TunedPolicy(),
    78          () => new UCBNormalPolicy(),
    79          () => new BoltzmannExplorationPolicy(1),
    80          () => new BoltzmannExplorationPolicy(10),
    81          () => new BoltzmannExplorationPolicy(20),
    82          () => new BoltzmannExplorationPolicy(100),
    83          () => new BoltzmannExplorationPolicy(200),
    84          () => new BoltzmannExplorationPolicy(500),
     43         //() => new RandomPolicy(),
     44         // () => new ActiveLearningPolicy(), 
     45         //() => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
     46         //() => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
     47         //() => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
     48         //() => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
     49         ////() => new GaussianThompsonSamplingPolicy(),
     50         //() => new GaussianThompsonSamplingPolicy(true),
     51         //() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
     52         //() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
     53         ////() => new BernoulliThompsonSamplingPolicy(),
     54         //() => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
     55         //() => new EpsGreedyPolicy(0.01),
     56         //() => new EpsGreedyPolicy(0.05),
     57         //() => new EpsGreedyPolicy(0.1),
     58         //() => new EpsGreedyPolicy(0.2),
     59         //() => new EpsGreedyPolicy(0.5),
     60         //() => new UCTPolicy(0.01),
     61         //() => new UCTPolicy(0.05),
     62         //() => new UCTPolicy(0.1),
     63         //() => new UCTPolicy(0.5),
     64         //() => new UCTPolicy(1),
     65         //() => new UCTPolicy(2),
     66         //() => new UCTPolicy( 5),
     67         //() => new UCTPolicy( 10),
     68         //() => new ModifiedUCTPolicy(0.01),
     69         //() => new ModifiedUCTPolicy(0.05),
     70         //() => new ModifiedUCTPolicy(0.1),
     71         //() => new ModifiedUCTPolicy(0.5),
     72         //() => new ModifiedUCTPolicy(1),
     73         //() => new ModifiedUCTPolicy(2),
     74         //() => new ModifiedUCTPolicy( 5),
     75         //() => new ModifiedUCTPolicy( 10),
     76         //() => new UCB1Policy(),
     77         //() => new UCB1TunedPolicy(),
     78         //() => new UCBNormalPolicy(),
     79         //() => new BoltzmannExplorationPolicy(1),
     80         //() => new BoltzmannExplorationPolicy(10),
     81         //() => new BoltzmannExplorationPolicy(20),
     82         //() => new BoltzmannExplorationPolicy(100),
     83         //() => new BoltzmannExplorationPolicy(200),
     84         //() => new BoltzmannExplorationPolicy(500),
    8585         () => new ChernoffIntervalEstimationPolicy( 0.01),
    8686         () => new ChernoffIntervalEstimationPolicy( 0.05),
    8787         () => new ChernoffIntervalEstimationPolicy( 0.1),
    8888         () => new ChernoffIntervalEstimationPolicy( 0.2),
    89          () => new ThresholdAscentPolicy(5, 0.01),
    90          () => new ThresholdAscentPolicy(5, 0.05),
    91          () => new ThresholdAscentPolicy(5, 0.1),
    92          () => new ThresholdAscentPolicy(5, 0.2),
    93          () => new ThresholdAscentPolicy(10, 0.01),
    94          () => new ThresholdAscentPolicy(10, 0.05),
    95          () => new ThresholdAscentPolicy(10, 0.1),
    96          () => new ThresholdAscentPolicy(10, 0.2),
    97          () => new ThresholdAscentPolicy(50, 0.01),
    98          () => new ThresholdAscentPolicy(50, 0.05),
    99          () => new ThresholdAscentPolicy(50, 0.1),
    100          () => new ThresholdAscentPolicy(50, 0.2),
    101          () => new ThresholdAscentPolicy(100, 0.01),
    102          () => new ThresholdAscentPolicy(100, 0.05),
    103          () => new ThresholdAscentPolicy(100, 0.1),
    104          () => new ThresholdAscentPolicy(100, 0.2),
    105          () => new ThresholdAscentPolicy(500, 0.01),
    106          () => new ThresholdAscentPolicy(500, 0.05),
    107          () => new ThresholdAscentPolicy(500, 0.1),
    108          () => new ThresholdAscentPolicy(500, 0.2),
     89         //() => new ThresholdAscentPolicy(5, 0.01),
     90         //() => new ThresholdAscentPolicy(5, 0.05),
     91         //() => new ThresholdAscentPolicy(5, 0.1),
     92         //() => new ThresholdAscentPolicy(5, 0.2),
     93         //() => new ThresholdAscentPolicy(10, 0.01),
     94         //() => new ThresholdAscentPolicy(10, 0.05),
     95         //() => new ThresholdAscentPolicy(10, 0.1),
     96         //() => new ThresholdAscentPolicy(10, 0.2),
     97         //() => new ThresholdAscentPolicy(50, 0.01),
     98         //() => new ThresholdAscentPolicy(50, 0.05),
     99         //() => new ThresholdAscentPolicy(50, 0.1),
     100         //() => new ThresholdAscentPolicy(50, 0.2),
     101         //() => new ThresholdAscentPolicy(100, 0.01),
     102         //() => new ThresholdAscentPolicy(100, 0.05),
     103         //() => new ThresholdAscentPolicy(100, 0.1),
     104         //() => new ThresholdAscentPolicy(100, 0.2),
     105         //() => new ThresholdAscentPolicy(500, 0.01),
     106         //() => new ThresholdAscentPolicy(500, 0.05),
     107         //() => new ThresholdAscentPolicy(500, 0.1),
     108         //() => new ThresholdAscentPolicy(500, 0.2),
    109109         //() => new ThresholdAscentPolicy(5000, 0.01),
    110110         //() => new ThresholdAscentPolicy(10000, 0.01),
     
    128128              var localRand = new Random(localRandSeed);
    129129              var options = new ParallelOptions();
    130               options.MaxDegreeOfParallelism = 4;
     130              options.MaxDegreeOfParallelism = 1;
    131131              Parallel.For(0, reps, options, (i) => {
    132132                Random myLocalRand;
     
    314314      var problemFactories = new Func<Tuple<int, int, ISymbolicExpressionTreeProblem>>[]
    315315      {
    316         () => Tuple.Create(100000, 23,  (ISymbolicExpressionTreeProblem)new SymbolicRegressionPoly10Problem()),
    317         //() => Tuple.Create(100000, 17, (ISymbolicExpressionTreeProblem)new SantaFeAntProblem()),
     316        //() => Tuple.Create(100000, 23,  (ISymbolicExpressionTreeProblem)new SymbolicRegressionPoly10Problem()),
     317        () => Tuple.Create(100000, 17, (ISymbolicExpressionTreeProblem)new SantaFeAntProblem()),
    318318        //() => Tuple.Create(50000, 32,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()),
    319319        //() => Tuple.Create(50000, 64, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()),
Note: See TracChangeset for help on using the changeset viewer.