Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/21/18 18:17:18 (5 years ago)
Author:
gkronber
Message:

#2929: worked on display of results, converting PGE expressions to HL Solutions.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/PGE.cs

    r16231 r16315  
    33using System.Linq;
    44using System.Runtime.InteropServices;
    5 using System.Text;
    65using System.Threading;
     6using HeuristicLab.Analysis;
    77using HeuristicLab.Common;
    88using HeuristicLab.Core;
    99using HeuristicLab.Data;
    10 using HeuristicLab.Optimization;
     10using System.Text.RegularExpressions;
     11using HeuristicLab.Optimization;
    1112using HeuristicLab.Parameters;
    1213using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    13 using HeuristicLab.Problems.DataAnalysis;     
     14using HeuristicLab.Problems.DataAnalysis;
     15using HeuristicLab.Problems.DataAnalysis.Symbolic;
     16using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    1417
    1518namespace PGE {
     
    1821  [Creatable(Category = CreatableAttribute.Categories.Algorithms, Priority = 999)]
    1922
    20   [StorableClass] 
     23  [StorableClass]
    2124  public unsafe class PGE : BasicAlgorithm {
    2225
     
    4952
    5053    #region parameter names
    51     private static readonly string MaxIterationsParameterName = "MaxIterations";                     
     54    private static readonly string MaxIterationsParameterName = "MaxIterations";
    5255    private static readonly string MaxGenParameterName = "MaxGen";
    5356    private static readonly string EvalrCountParameterName = "EvalrCount";
     
    5558    private static readonly string MinSizeParameterName = "MinSize";
    5659    private static readonly string MaxDepthParameterName = "MaxDepth";
    57     private static readonly string SearchVarParameterName = "SearchVar";
    5860    private static readonly string MinDepthParameterName = "MinDepth";
    5961    private static readonly string PgeRptEpochParameterName = "PgeRptEpoch";
     
    6264    private static readonly string PeelCntParameterName = "PeelCnt";
    6365    private static readonly string ZeroEpsilonParameterName = "ZeroEpsilon";
    64     private static readonly string HitRatioParameterName = "HitRatio";               
     66    private static readonly string HitRatioParameterName = "HitRatio";
    6567    private static readonly string InitMethodParameterName = "InitMethod";
    6668    private static readonly string GrowMethodParameterName = "GrowMethod";
     
    121123    }
    122124
    123     private IFixedValueParameter<IntValue> SearchVarParameter {
    124       get { return (IFixedValueParameter<IntValue>)Parameters[SearchVarParameterName]; }
    125     }
    126     public int SearchVar {
    127       get { return SearchVarParameter.Value.Value; }
    128       set { SearchVarParameter.Value.Value = value; }
    129     }
    130 
    131125    private IFixedValueParameter<IntValue> MinDepthParameter {
    132126      get { return (IFixedValueParameter<IntValue>)Parameters[MinDepthParameterName]; }
     
    240234      // algorithm parameters are shown in the GUI
    241235      Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, new IntValue(50)));
    242       Parameters.Add(new FixedValueParameter<IntValue>(SearchVarParameterName, new IntValue(0)));
    243236      Parameters.Add(new FixedValueParameter<IntValue>(MinDepthParameterName, new IntValue(1)));
    244237      Parameters.Add(new FixedValueParameter<IntValue>(MaxDepthParameterName, new IntValue(6)));
     
    250243      Parameters.Add(new FixedValueParameter<IntValue>(PgeRptCountParameterName, new IntValue(20)));
    251244      Parameters.Add(new FixedValueParameter<IntValue>(PgeRptEpochParameterName, new IntValue(1)));
    252       Parameters.Add(new FixedValueParameter<IntValue>(MaxGenParameterName, new IntValue(200)));               
    253                                                                                                                                      
     245      Parameters.Add(new FixedValueParameter<IntValue>(MaxGenParameterName, new IntValue(200)));
     246
    254247      Parameters.Add(new FixedValueParameter<StringValue>(InitMethodParameterName, new StringValue("method1")));  // TODO Dropdown
    255248      Parameters.Add(new FixedValueParameter<StringValue>(GrowMethodParameterName, new StringValue("method1")));
     
    264257    }
    265258
    266  
     259
    267260    [StorableConstructor]
    268261    public PGE(bool deserializing) : base(deserializing) { }
    269262
    270    
    271     public PGE(PGE original, Cloner cloner) : base(original, cloner) {   
     263
     264    public PGE(PGE original, Cloner cloner) : base(original, cloner) {
    272265      // nothing to clone
    273266    }
     
    278271
    279272    protected override void Run(CancellationToken cancellationToken) {
     273      Log log = new Log();
     274      Results.Add(new Result("Log", log));
     275      var iterationsResult = new IntValue(0);
     276      Results.Add(new Result("Iteration", iterationsResult));
     277      var bestTestScoreResult = new IntValue(0); // TODO: why is test score an int?
     278      Results.Add(new Result("Best test score", bestTestScoreResult));
     279      var testScoresTable = new DataTable("Test scores");
     280      var bestTestScoreRow = new DataRow("Best test score");
     281      var curTestScoreRow = new DataRow("Current test score");
     282      testScoresTable.Rows.Add(bestTestScoreRow);
     283      testScoresTable.Rows.Add(curTestScoreRow);
     284      Results.Add(new Result("Test scores", testScoresTable));
     285      var lengthsTable = new DataTable("Lengths");
     286      var len1Row = new DataRow("Length 1");
     287      var len2Row = new DataRow("Length 2");
     288      lengthsTable.Rows.Add(len1Row);
     289      lengthsTable.Rows.Add(len2Row);
     290      Results.Add(new Result("Lengths", lengthsTable));
     291
     292      var bestSolutionResult = new Result("Best solution", typeof(IRegressionSolution));
     293      Results.Add(bestSolutionResult);
     294
    280295      // TODO: the following is potentially problematic for other go processes run on the same machine at the same time
    281296      // shouldn't be problematic bc is inherited only, normally only child processes are affected
     
    287302
    288303      //Constants
    289       int sortType = 0;
     304      int sortType = 0; // TODO what's sort type?
    290305      string problemTypeString = "benchmark";
    291306      int numProc = 12;
     
    304319      nTestData = Problem.ProblemData.TestPartition.Size;
    305320
    306       var inputVariableNames = string.Join(" ", problemData.AllowedInputVariables);
    307       // TODO: does this work when input variables contain spaces?
    308       // is split on the go side, just for simpler passing
     321      if (problemData.AllowedInputVariables.Any(iv => iv.Contains(" ")))
     322        throw new NotSupportedException("PGE does not support variable names which contain spaces");
     323
     324      var inputVariableNames = string.Join(" ", problemData.AllowedInputVariables);
    309325
    310326      IntPtr cIndepNames = Marshal.StringToHGlobalAnsi(inputVariableNames);
    311327      IntPtr cDependentNames = Marshal.StringToHGlobalAnsi(problemData.TargetVariable);
    312       // TODO: is it ok to use any variable here?
    313       // Dependent- and Independentnames are the variables from the test/train data, e.g. from "Korns_02.trn" dep: x y z v w  indep: f(xs)
     328      // Dependent- and Independentnames are the variables from the test/train data, e.g. from "Korns_02.trn" indep: x y z v w  dep: f(xs)
    314329
    315330      IntPtr cInitMethod = Marshal.StringToHGlobalAnsi(InitMethod);
     
    326341
    327342      AddTestData(cIndepNames, cDependentNames, testData, nTestData);
    328 
    329343      AddTrainData(cIndepNames, cDependentNames, trainData, nTrainData);
    330344
     
    334348      InitSearch(MaxGen, PgeRptEpoch, PgeRptCount, PgeArchiveCap, PeelCnt, EvalrCount, ZeroEpsilon, cInitMethod, cGrowMethod, sortType);
    335349
    336       // cUsableVars: list of indices into dependent variables
     350      // cUsableVars: list of indices into independent variables
    337351      InitTreeParams(cRoots, cNodes, cNonTrig, cLeafs, cUseableVars, numberOfUseableVariables, MaxSize, MinSize, MaxDepth, MinDepth);
    338352
    339       // SearchVar: list of indices into independent variables (0 for first index)
    340       InitProblem(cName, MaxIterations, HitRatio, SearchVar, cProblemTypeString, numProc);
    341 
    342       var curItersItem = new IntValue();
     353     
     354      InitProblem(cName, MaxIterations, HitRatio,
     355        searchVar: numberOfUseableVariables,  // SearchVar: index of dependent variables (0 for first index), we use the convention to put the target at the end
     356        ProblemTypeString: cProblemTypeString, numProcs: numProc);
    343357
    344358      for (int iter = 1; iter <= MaxIterations; iter++) {
    345         curItersItem.Value = iter;
     359        iterationsResult.Value = iter;
    346360
    347361        int nResults = StepW();
    348362
    349         for (int iResult = 0; iResult < nResults; iResult++) { 
     363        for (int iResult = 0; iResult < nResults; iResult++) {
    350364          int nobestpush = 0;       //bool
    351365          int bestNewMinError = 0;  //bool
    352           int bestlen1 = 0; 
     366          int bestlen1 = 0;
    353367          int bestlen2 = 0;
    354368          int nCoeff = 0;
     
    356370
    357371          IntPtr eqn = GetStepResult(out nobestpush, out bestNewMinError, out bestlen1, out bestlen2, out testScore, out nCoeff);
    358 
    359372          string eqnStr = Marshal.PtrToStringAnsi(eqn);
    360373
    361           Console.WriteLine("Push/Pop (" + bestlen1 + "," + bestlen2 + ") " + eqnStr);
    362           StringBuilder sb = new StringBuilder("");
     374          double[] coeff = new double[nCoeff];
    363375          for (int iCoeff = 0; iCoeff < nCoeff; iCoeff++) {
    364             double coeffVal = GetCoeffResult();
    365             Console.WriteLine("Coeff: " + coeffVal);
    366             sb.Append(coeffVal + "; ");
     376            coeff[iCoeff] = GetCoeffResult();
    367377          }
    368 
    369           var curItersResult = new Result("Iteration " + iter + " " + iResult, curItersItem);
    370           var coeffItersResult = new Result("Coeff " + iter + " " + iResult, new StringValue(sb.ToString()));
    371 
    372           var bestQualityItem = new StringValue(eqnStr);
    373           var bestQualityResult = new Result("Best quality " + iter + " " + iResult, bestQualityItem);
    374           Results.Add(curItersResult);
    375           Results.Add(coeffItersResult);
    376           Results.Add(bestQualityResult);
     378          log.LogMessage("Push/Pop (" + iResult + ", " + bestlen1 + ", " + bestlen2 + ", " + testScore + ", noBestPush: " + (nobestpush > 0) + ", bestNewMin: " + (bestNewMinError > 0) + ") " + eqnStr + " coeff: " + string.Join(" ", coeff));
     379
     380          if (bestNewMinError > 0) {
     381            // update best quality
     382            bestTestScoreResult.Value = testScore;
     383            var sol = CreateSolution(problemData, eqnStr, coeff, problemData.AllowedInputVariables.ToArray());
     384            bestSolutionResult.Value = sol;
     385          }
     386          bestTestScoreRow.Values.Add(bestTestScoreResult.Value); // always add the current best test score to data row
     387          curTestScoreRow.Values.Add(testScore);
     388          len1Row.Values.Add(bestlen1);
     389          len2Row.Values.Add(bestlen2);
    377390        }
    378391
     
    401414    }
    402415
     416    private static readonly Regex varRegex = new Regex(@"X_(\d)+");
     417    private static readonly Regex coeffRegex = new Regex(@"C_(\d)+");
     418
     419    private IRegressionSolution CreateSolution(IRegressionProblemData problemData, string eqnStr, double[] coeff, string[] usableVariables) {
     420      // coefficients are named e.g. "C_0" in the PGE expressions
     421      // -> replace all patterns "C_\d" by the corresponding coefficients
     422      var match = coeffRegex.Match(eqnStr);
     423      while (match.Success) {
     424        var coeffIdx = int.Parse(match.Groups[1].ToString());
     425        eqnStr = eqnStr.Substring(0, match.Index) +
     426          "(" + coeff[coeffIdx].ToString(System.Globalization.CultureInfo.InvariantCulture) + ")" +
     427          eqnStr.Substring(match.Index + match.Length);
     428        match = coeffRegex.Match(eqnStr);
     429      }
     430
     431      // variables are named e.g. "X_0" in the PGE expressions
     432      // -> replace all patterns "X_\d" by the corresponding variable name
     433      match = varRegex.Match(eqnStr);
     434      while (match.Success) {
     435        var varIdx = int.Parse(match.Groups[1].ToString());
     436        eqnStr = eqnStr.Substring(0, match.Index) +
     437          "'" + usableVariables[varIdx] + "'" +
     438          eqnStr.Substring(match.Index + match.Length);
     439        match = varRegex.Match(eqnStr);
     440      }
     441
     442      var parser = new InfixExpressionParser();
     443      var tree = parser.Parse(eqnStr);
     444      var model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
     445      return model.CreateRegressionSolution((IRegressionProblemData)problemData.Clone());
     446    }
     447
    403448    public override bool SupportsPause {
    404449      get { return false; }
     
    423468      double[] val = new double[rows.Count() * dim];
    424469      int r = 0;
    425       foreach(var row in rows) {
     470      foreach (var row in rows) {
    426471        int c = 0;
    427         foreach(var var in variableNames) {
     472        foreach (var var in variableNames) {
    428473          val[r * dim + c] = ds.GetDoubleValue(var, r);
    429474          c++;
Note: See TracChangeset for help on using the changeset viewer.