Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
10/27/17 18:42:04 (7 years ago)
Author:
gkronber
Message:

#2796 comments, simplifications, reviewed tests for structure enumeration (not working)

Location:
branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg.csproj

    r15425 r15437  
    103103    <Compile Include="MctsSymbolicRegression\Automaton.cs" />
    104104    <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" />
    105     <Compile Include="MctsSymbolicRegression\ConstraintHandler.cs" />
    106105    <Compile Include="MctsSymbolicRegression\Disassembler.cs" />
    107106    <Compile Include="MctsSymbolicRegression\ExpressionEvaluator.cs" />
     
    113112    <Compile Include="MctsSymbolicRegression\Policies\IPolicy.cs" />
    114113    <Compile Include="MctsSymbolicRegression\Policies\PolicyBase.cs" />
    115     <Compile Include="MctsSymbolicRegression\Policies\Ucb.cs" />
    116     <Compile Include="MctsSymbolicRegression\Policies\UcbTuned.cs" />
    117114    <Compile Include="MctsSymbolicRegression\ExprHash.cs" />
    118115    <Compile Include="MctsSymbolicRegression\EmptyConstraintHandler.cs" />
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/Heuristics.cs

    r15426 r15437  
    1313  //           - even if variables are colinear?
    1414  //           - even for non-linear transformations
    15 
     15  //
     16  // Also see Multi-variate adaptive regression splines (MARS)
     17  // Maybe we could use MARS-style basis functions to identify the relevant interaction terms. (tune split points and find optimal interaction term with max spearmans rank)
     18  //
    1619  // assuming we interactions of have scaled/shifted variables (x + xo) * (y + yo) with constant xo and yo
    1720  // this leads to: x y + x yo + y xo + yo xo.
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Automaton.cs

    r15420 r15437  
    4343  //
    4444  internal class Automaton {
    45     // TODO: refactor so that State is an enumerable type
    4645
    4746    // there is a single final state (ExprEnd)
    4847    // states with lower values are closer to the final state
    4948    // (this is helpful when we try to navigate to the final state)
     49
     50    // we cannot use an enum type here because the set of states is dynamic (including states for variables)
    5051    public const int StateExprEnd = 1;
    5152    public const int StateTermEnd = 2;
     
    8182    public const int FirstDynamicState = 25;
    8283    // more states for individual variables are created dynamically
     84    public readonly List<string> stateNames = new List<string>() {
     85       string.Empty,
     86      "ExprEnd",
     87      "TermEnd",
     88      "FactorEnd",
     89      "VariableFactorEnd",
     90      "ExpFactorEnd",
     91      "LogFactorEnd",
     92      "InvFactorEnd",
     93      "ExpFEnd",
     94      "LogTEnd",
     95      "InvTEnd",
     96      "LogTFEnd",
     97      "InfTFEnd",
     98      "LogTFStart",
     99      "InvTFStart",
     100      "ExpFStart",
     101      "LogTStart",
     102      "InvTStart",
     103      "VariableFactorStart",
     104      "ExpFactorStart",
     105      "LogFactorStart",
     106      "InvFactorStart",
     107      "FactorStart",
     108      "TermStart",
     109      "Expr",
     110    };
     111
    83112
    84113    private const int StartState = StateExpr;
    85114    public int CurrentState { get; private set; }
    86115
    87     public readonly List<string> stateNames;
    88116    private List<int>[] followStates;
    89117    private List<Action>[,] actions; // not every follow state is possible but this representation should be efficient
     
    99127       bool allowMultipleTerms = false) {
    100128      int nVars = vars.Length;
    101       stateNames = new List<string>() { string.Empty, "Expr", "ExprEnd", "TermStart", "TermEnd", "FactorStart", "FactorEnd", "VarFactorStart", "VarFactorEnd", "ExpFactorStart", "ExpFactorEnd", "LogFactorStart", "LogFactorEnd", "InvFactorStart", "InvFactorEnd", "ExpFStart", "ExpFEnd", "LogTStart", "LogTEnd", "LogTFStart", "LogTFEnd", "InvTStart", "InvTEnd", "InvTFStart", "InvTFEnd" };
    102129      codeGenerator = new CodeGenerator();
    103130      this.constraintHandler = constraintHandler;
     
    369396    }
    370397
    371     private readonly int[] followStatesBuf = new int[1000];
    372     public void FollowStates(int state, out int[] buf, out int nElements) {
     398    public void FollowStates(int state, ref int[] buf, out int nElements) {
    373399      var fs = followStates[state];
    374400      int j = 0;
     
    376402        var s = fs[i];
    377403        if (constraintHandler.IsAllowedFollowState(state, s)) {
    378           followStatesBuf[j++] = s;
     404          buf[j++] = s;
    379405        }
    380406      }
    381       buf = followStatesBuf;
    382407      nElements = j;
    383408    }
     
    446471
    447472    internal string GetActionString(int fromState, int toState) {
    448       return actionStrings[fromState,toState] != null ? string.Join(" , ", actionStrings[fromState, toState]) : "";
     473      return actionStrings[fromState, toState] != null ? string.Join(" , ", actionStrings[fromState, toState]) : "";
    449474    }
    450475
     
    454479        writer.WriteLine("digraph {");
    455480        // writer.WriteLine("rankdir=LR");
    456         for (int s = StartState; s < stateNames.Count; s++) {
     481        for (int s = 1; s < stateNames.Count; s++) {
    457482          for (int i = 0; i < followStates[s].Count; i++) {
    458483            if (followStates[s][i] <= 0) continue;
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Disassembler.cs

    r15414 r15437  
    5858        switch (op) {
    5959          case (byte)OpCodes.Add: sb.Append(" + "); break;
    60           case (byte)OpCodes.Mul: sb.Append(""); break;
    61           case (byte)OpCodes.LoadConst1: break;
    62           case (byte)OpCodes.LoadConst0: break;
    63           case (byte)OpCodes.LoadParamN: break;
     60          case (byte)OpCodes.Mul: sb.Append(" * "); break;
     61          case (byte)OpCodes.LoadConst1: sb.Append(" 1 ");  break;
     62          case (byte)OpCodes.LoadConst0: sb.Append(" 0 "); break;
     63          case (byte)OpCodes.LoadParamN: sb.Append(" c "); break;
    6464          case (byte)OpCodes.LoadVar: {
    6565              short arg = (short)((code[pc] << 8) | code[pc + 1]);
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs

    r15416 r15437  
    186186      //   "Balancing parameter in UCT formula (0 < c < 1000). Small values: greedy search. Large values: enumeration. Default: 1.0", new DoubleValue(1.0)));
    187187      Parameters.Add(new ValueParameter<IPolicy>(PolicyParameterName,
    188         "The policy to use for selecting nodes in MCTS (e.g. Ucb)", new Ucb()));
     188        "The policy to use for selecting nodes in MCTS", new EpsilonGreedy()));
    189189      PolicyParameter.Hidden = true;
    190190      Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>(AllowedFactorsParameterName,
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs

    r15425 r15437  
    6363    // TODO: Solve Poly-10
    6464    // TODO: After state unification the recursive backpropagation of results takes a lot of time. How can this be improved?
    65     // TODO: Why is the algorithm so slow for rather greedy policies (e.g. low C value in UCB)?
    66     // TODO: check if we can use a quality measure with range [-1..1] in policies
     65    // ~~obsolete TODO: Why is the algorithm so slow for rather greedy policies (e.g. low C value in UCB)?
     66    // ~~obsolete TODO: check if we can use a quality measure with range [-1..1] in policies
    6767    // TODO: unit tests for benchmark problems which contain log / exp / x^-1 but without numeric constants
    6868    // TODO: check if transformation of y is correct and works (Obj 2)
     
    142142      private readonly double[] predBuf, testPredBuf;
    143143      private readonly double[][] gradBuf;
    144 
    145       // debugging stats
    146       // calculate for each level the number of alternatives the average 'inequality' of tries and 'inequality' of quality over the alternatives for each trie
    147       // inequality can be calculated using the Gini coefficient
    148       internal readonly double[] pathGiniCoeffs = new double[100];
    149       internal readonly double[] pathQs = new double[100];
    150       internal readonly double[] levelBestQ = new double[100];
    151       // internal readonly double[] levelMaxTries = new double[100];
    152       internal readonly double[] pathBestQ = new double[100]; // as long as pathBestQs = levelBestQs we are following the correct path
    153       internal readonly string[] levelBestAction = new string[100];
    154       internal readonly string[] curAction = new string[100];
    155       internal readonly double[] pathSelectedQ = new double[100];
    156144
    157145      public State(IRegressionProblemData problemData, uint randSeed, int maxVariables, bool scaleVariables,
     
    200188
    201189        this.automaton = new Automaton(x, new SimpleConstraintHandler(maxVariables), allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms);
    202         this.treePolicy = treePolicy ?? new Ucb();
     190        this.treePolicy = treePolicy ?? new EpsilonGreedy();
    203191        this.tree = new Tree() {
    204192          state = automaton.CurrentState,
     
    455443      #endregion
    456444
    457 #if DEBUG
    458       internal void ClearStats() {
    459         for (int i = 0; i < pathGiniCoeffs.Length; i++) pathGiniCoeffs[i] = -1;
    460         for (int i = 0; i < pathQs.Length; i++) pathGiniCoeffs[i] = -99;
    461         for (int i = 0; i < pathBestQ.Length; i++) pathBestQ[i] = -99;
    462         for (int i = 0; i < pathSelectedQ.Length; i++) pathSelectedQ[i] = -99;
    463       }
    464       internal void WriteGiniStats() {
    465         Console.WriteLine(string.Join("\t", pathGiniCoeffs.TakeWhile(x => x >= 0).Select(x => string.Format("{0:N3}", x))));
    466       }
    467       internal void WriteQs() {
    468         // Console.WriteLine(string.Join("\t", pathQs.TakeWhile(x => x >= -100).Select(x => string.Format("{0:N3}", x))));
    469         var sb = new StringBuilder();
    470         // length
    471         int i = 0;
    472         while (i < pathBestQ.Length && pathBestQ[i] > -99 && pathBestQ[i] == levelBestQ[i]) {
    473           i++;
    474         }
    475         sb.AppendFormat("{0,-3}",i);
    476 
    477         i = 0;
    478         // sb.AppendFormat("{0:N3}", levelBestQ[0]);
    479         while (i < pathSelectedQ.Length && pathSelectedQ[i] > -99) {
    480           sb.AppendFormat("\t{0:N3}", pathSelectedQ[i]);
    481           i++;
    482         }
    483         Console.WriteLine(sb.ToString());
    484         sb.Clear();
    485         i = 0;
    486         // sb.AppendFormat("{0:N3}", levelBestQ[0]);
    487         while (i < pathBestQ.Length && pathBestQ[i] > -99) {
    488           sb.AppendFormat("\t{0:N3}", pathBestQ[i]);
    489           i++;
    490         }
    491         Console.WriteLine(sb.ToString());
    492         sb.Clear();
    493         i = 0;
    494         while (i < pathBestQ.Length && pathBestQ[i] > -99) {
    495           sb.AppendFormat("\t{0:N3}", levelBestQ[i]);
    496           i++;
    497         }
    498         Console.WriteLine(sb.ToString());
    499        
    500         sb.Clear();
    501         i = 0;
    502         while (i < pathBestQ.Length && pathBestQ[i] > -99) {
    503           sb.AppendFormat("\t{0,-5}", (curAction[i] != null && curAction[i].Length > 5) ? curAction[i].Substring(0, 5) : curAction[i]);
    504           i++;
    505         }
    506         Console.WriteLine(sb.ToString());
    507         sb.Clear();
    508         i = 0;
    509         while (i < pathBestQ.Length && pathBestQ[i] > -99) {
    510           sb.AppendFormat("\t{0,-5}", (levelBestAction[i] != null && levelBestAction[i].Length > 5) ? levelBestAction[i].Substring(0, 5) : levelBestAction[i]);
    511           i++;
    512         }
    513         Console.WriteLine(sb.ToString());
    514        
    515         Console.WriteLine();
    516       }
    517 
    518 
    519 #endif
    520445
    521446    }
     
    578503      bool success = false;
    579504      do {
    580 #if DEBUG
    581         mctsState.ClearStats();
    582 #endif
     505
    583506        automaton.Reset();
    584507        success = TryTreeSearchRec2(rand, tree, automaton, eval, treePolicy, mctsState, out q);
     
    588511
    589512#if DEBUG
    590       // mctsState.WriteGiniStats();
    591513      Console.WriteLine(ExprStr(automaton));
    592       mctsState.WriteQs();
    593       // Console.WriteLine(WriteStatistics(tree, mctsState));
    594 
    595514#endif
    596       //if (mctsState.effectiveRollouts % 100 == 1) {
    597       // Console.WriteLine(WriteTree(tree, mctsState));
    598       // Console.WriteLine(TraceTree(tree, mctsState));
    599       //}
    600515      return q;
    601516    }
     
    620535
    621536      while (!automaton.IsFinalState(automaton.CurrentState)) {
     537        Console.WriteLine(automaton.stateNames[automaton.CurrentState]);
    622538        if (state.children.ContainsKey(tree)) {
    623539          if (state.children[tree].All(ch => ch.Done)) {
     
    631547            selectedIdx = treePolicy.Select(state.children[tree].Select(ch => ch.actionStatistics), rand);
    632548          }
    633          
    634           // STATS
    635           state.pathGiniCoeffs[tree.level] = InequalityCoefficient(state.children[tree].Select(ch => (double)ch.actionStatistics.AverageQuality));
    636           state.pathQs[tree.level] = tree.actionStatistics.AverageQuality;
    637549
    638550          tree = state.children[tree][selectedIdx];
    639551
    640552          // move the automaton forward until reaching the state
    641           // all steps where no alternatives are possible are immediately taken
     553          // all steps where no alternatives could be taken immediately (without expanding the tree)
    642554          // TODO: simplification of the automaton
    643           int[] possibleFollowStates;
     555          int[] possibleFollowStates = new int[1000];
    644556          int nFs;
    645           automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
    646           // TODO!
    647           // while (possibleFollowStates[0] != tree.state && nFs == 1 &&
    648           //   !automaton.IsEvalState(possibleFollowStates[0]) && !automaton.IsFinalState(possibleFollowStates[0])) {
    649           //   automaton.Goto(possibleFollowStates[0]);
    650           //   automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
    651           // }
     557          automaton.FollowStates(automaton.CurrentState, ref possibleFollowStates, out nFs);
    652558          Debug.Assert(possibleFollowStates.Contains(tree.state));
    653559          automaton.Goto(tree.state);
    654560        } else {
    655561          // EXPAND
    656           int[] possibleFollowStates;
     562          int[] possibleFollowStates = new int[1000];
    657563          int nFs;
    658564          string actionString = "";
    659           automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
    660           // TODO
    661           // while (nFs == 1 && !automaton.IsEvalState(possibleFollowStates[0]) && !automaton.IsFinalState(possibleFollowStates[0])) {
    662           //   actionString += " " + automaton.GetActionString(automaton.CurrentState, possibleFollowStates[0]);
    663           //   // no alternatives -> just go to the next state
    664           //   automaton.Goto(possibleFollowStates[0]);
    665           //   automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
    666           // }
     565          automaton.FollowStates(automaton.CurrentState, ref possibleFollowStates, out nFs);
     566
    667567          if (nFs == 0) {
    668568            // stuck in a dead end (no final state and no allowed follow states)
     
    676576            // for selected states (EvalStates) we introduce state unification (detection of equivalent states)
    677577            if (automaton.IsEvalState(possibleFollowStates[i])) {
    678               var hc = Hashcode(automaton);
     578              var hc = Hashcode(automaton); // TODO fix unit test for structure enumeration
    679579              if (!state.nodes.TryGetValue(hc, out child)) {
    680580                child = new Tree() {
     
    688588              }
    689589              // only allow forward edges (don't add the child if we would go back in the graph)
    690               else if (child.level > tree.level)  {
     590              else if (child.level > tree.level) {
    691591                // whenever we join paths we need to propagate back the statistics of the existing node through the newly created link
    692592                // to all parents
     
    696596                Debug.Assert(child.level <= tree.level);
    697597                child = null;
    698               }   
     598              }
    699599            } else {
    700600              child = new Tree() {
     
    740640        q = eval(code, nParams);
    741641        // Console.WriteLine("{0:N4}\t{1}", q*q, tree.expr);
    742         q = TransformQuality(q);
    743642        success = true;
    744         BackpropagateQuality(tree, q, treePolicy, state);       
     643        BackpropagateQuality(tree, q, treePolicy, state);
    745644      } else {
    746645        // we got stuck in roll-out (not evaluation necessary!)
     
    758657
    759658      return success;
    760     }
    761 
    762     private static double InequalityCoefficient(IEnumerable<double> xs) {
    763       var arr = xs.ToArray();
    764       var sad = 0.0;
    765       var sum = 0.0;
    766 
    767       for(int i=0;i<arr.Length;i++) {
    768         for(int j=0;j<arr.Length;j++) {
    769           sad += Math.Abs(arr[i] - arr[j]);
    770           sum += arr[j];
    771         }
    772       }
    773       return 0.5 * sad / sum;     
    774     }
    775 
    776     private static double TransformQuality(double q) {
    777       // no transformation
    778       return q;
    779 
    780       // EXPERIMENTAL!
    781 
    782       // Fisher transformation
    783       // (assumes q is Correl(pred, target)
    784 
    785       q = Math.Min(q,  0.99999999);
    786       q = Math.Max(q, -0.99999999);
    787       return 0.5 * Math.Log((1 + q) / (1 - q));
    788 
    789       // optimal result: q = 1 -> return huge value
    790       // if (q >= 1.0) return 1E16;
    791       // // return number of 9s in R²
    792       // return -Math.Log10(1 - q);
    793659    }
    794660
     
    840706      }
    841707
    842       state.pathSelectedQ[tree.level] = tree.actionStatistics.AverageQuality;
    843       state.pathBestQ[tree.level] = tree.actionStatistics.BestQuality;
    844       state.curAction[tree.level] = tree.expr;
    845       if (state.levelBestQ[tree.level] < tree.actionStatistics.BestQuality) {
    846         state.levelBestQ[tree.level] = tree.actionStatistics.BestQuality;
    847         state.levelBestAction[tree.level] = tree.expr;
    848       }
    849708    }
    850709
     
    855714      Tree minChild = children.First();
    856715      for (int i = 1; i < children.Count; i++) {
    857         if(children[i].state < minChild.state)
     716        if (children[i].state < minChild.state)
    858717          selectedChildIdx = i;
    859718      }
     
    883742        } else {
    884743          // EXPAND
    885           int[] possibleFollowStates;
     744          int[] possibleFollowStates = new int[1000];
    886745          int nFs;
    887           automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
     746          automaton.FollowStates(automaton.CurrentState, ref possibleFollowStates, out nFs);
    888747          if (nFs == 0) {
    889748            // stuck in a dead end (no final state and no allowed follow states)
     
    1062921        }
    1063922
    1064         foreach(var tup in list) {
     923        foreach (var tup in list) {
    1065924          var ch = tup.Item3;
    1066925          var chId = tup.Item2;
    1067           if(state.children.ContainsKey(ch) && state.children[ch].Count == 1) {
     926          if (state.children.ContainsKey(ch) && state.children[ch].Count == 1) {
    1068927            var chch = state.children[ch].First();
    1069928            nextId++;
Note: See TracChangeset for help on using the changeset viewer.