Free cookie consent management tool by TermsFeed Policy Generator

Changeset 16315


Ignore:
Timestamp:
11/21/18 18:17:18 (6 years ago)
Author:
gkronber
Message:

#2929: worked on display of results, converting PGE expressions to HL Solutions.

Location:
branches/2929_PrioritizedGrammarEnumeration
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/HeuristicLab.Algorithms.DataAnalysis.PGE-3.3.csproj

    r16199 r16315  
    7272      <HintPath>..\..\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath>
    7373    </Reference>
     74    <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     75      <SpecificVersion>False</SpecificVersion>
     76      <HintPath>..\..\bin\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.dll</HintPath>
     77    </Reference>
    7478    <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    7579      <SpecificVersion>False</SpecificVersion>
     
    103107  </ItemGroup>
    104108  <ItemGroup>
     109    <ProjectReference Include="..\..\HeuristicLab.Analysis\3.3\HeuristicLab.Analysis-3.3.csproj">
     110      <Project>{887425B4-4348-49ED-A457-B7D2C26DDBF9}</Project>
     111      <Name>HeuristicLab.Analysis-3.3</Name>
     112    </ProjectReference>
    105113    <ProjectReference Include="..\..\HeuristicLab.Problems.Instances\3.3\HeuristicLab.Problems.Instances-3.3.csproj">
    106114      <Project>{3540E29E-4793-49E7-8EE2-FEA7F61C3994}</Project>
  • branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/PGE.cs

    r16231 r16315  
    33using System.Linq;
    44using System.Runtime.InteropServices;
    5 using System.Text;
    65using System.Threading;
     6using HeuristicLab.Analysis;
    77using HeuristicLab.Common;
    88using HeuristicLab.Core;
    99using HeuristicLab.Data;
    10 using HeuristicLab.Optimization;
     10using System.Text.RegularExpressions;
     11using HeuristicLab.Optimization;
    1112using HeuristicLab.Parameters;
    1213using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    13 using HeuristicLab.Problems.DataAnalysis;     
     14using HeuristicLab.Problems.DataAnalysis;
     15using HeuristicLab.Problems.DataAnalysis.Symbolic;
     16using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    1417
    1518namespace PGE {
     
    1821  [Creatable(Category = CreatableAttribute.Categories.Algorithms, Priority = 999)]
    1922
    20   [StorableClass] 
     23  [StorableClass]
    2124  public unsafe class PGE : BasicAlgorithm {
    2225
     
    4952
    5053    #region parameter names
    51     private static readonly string MaxIterationsParameterName = "MaxIterations";                     
     54    private static readonly string MaxIterationsParameterName = "MaxIterations";
    5255    private static readonly string MaxGenParameterName = "MaxGen";
    5356    private static readonly string EvalrCountParameterName = "EvalrCount";
     
    5558    private static readonly string MinSizeParameterName = "MinSize";
    5659    private static readonly string MaxDepthParameterName = "MaxDepth";
    57     private static readonly string SearchVarParameterName = "SearchVar";
    5860    private static readonly string MinDepthParameterName = "MinDepth";
    5961    private static readonly string PgeRptEpochParameterName = "PgeRptEpoch";
     
    6264    private static readonly string PeelCntParameterName = "PeelCnt";
    6365    private static readonly string ZeroEpsilonParameterName = "ZeroEpsilon";
    64     private static readonly string HitRatioParameterName = "HitRatio";               
     66    private static readonly string HitRatioParameterName = "HitRatio";
    6567    private static readonly string InitMethodParameterName = "InitMethod";
    6668    private static readonly string GrowMethodParameterName = "GrowMethod";
     
    121123    }
    122124
    123     private IFixedValueParameter<IntValue> SearchVarParameter {
    124       get { return (IFixedValueParameter<IntValue>)Parameters[SearchVarParameterName]; }
    125     }
    126     public int SearchVar {
    127       get { return SearchVarParameter.Value.Value; }
    128       set { SearchVarParameter.Value.Value = value; }
    129     }
    130 
    131125    private IFixedValueParameter<IntValue> MinDepthParameter {
    132126      get { return (IFixedValueParameter<IntValue>)Parameters[MinDepthParameterName]; }
     
    240234      // algorithm parameters are shown in the GUI
    241235      Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, new IntValue(50)));
    242       Parameters.Add(new FixedValueParameter<IntValue>(SearchVarParameterName, new IntValue(0)));
    243236      Parameters.Add(new FixedValueParameter<IntValue>(MinDepthParameterName, new IntValue(1)));
    244237      Parameters.Add(new FixedValueParameter<IntValue>(MaxDepthParameterName, new IntValue(6)));
     
    250243      Parameters.Add(new FixedValueParameter<IntValue>(PgeRptCountParameterName, new IntValue(20)));
    251244      Parameters.Add(new FixedValueParameter<IntValue>(PgeRptEpochParameterName, new IntValue(1)));
    252       Parameters.Add(new FixedValueParameter<IntValue>(MaxGenParameterName, new IntValue(200)));               
    253                                                                                                                                      
     245      Parameters.Add(new FixedValueParameter<IntValue>(MaxGenParameterName, new IntValue(200)));
     246
    254247      Parameters.Add(new FixedValueParameter<StringValue>(InitMethodParameterName, new StringValue("method1")));  // TODO Dropdown
    255248      Parameters.Add(new FixedValueParameter<StringValue>(GrowMethodParameterName, new StringValue("method1")));
     
    264257    }
    265258
    266  
     259
    267260    [StorableConstructor]
    268261    public PGE(bool deserializing) : base(deserializing) { }
    269262
    270    
    271     public PGE(PGE original, Cloner cloner) : base(original, cloner) {   
     263
     264    public PGE(PGE original, Cloner cloner) : base(original, cloner) {
    272265      // nothing to clone
    273266    }
     
    278271
    279272    protected override void Run(CancellationToken cancellationToken) {
     273      Log log = new Log();
     274      Results.Add(new Result("Log", log));
     275      var iterationsResult = new IntValue(0);
     276      Results.Add(new Result("Iteration", iterationsResult));
     277      var bestTestScoreResult = new IntValue(0); // TODO: why is test score an int?
     278      Results.Add(new Result("Best test score", bestTestScoreResult));
     279      var testScoresTable = new DataTable("Test scores");
     280      var bestTestScoreRow = new DataRow("Best test score");
     281      var curTestScoreRow = new DataRow("Current test score");
     282      testScoresTable.Rows.Add(bestTestScoreRow);
     283      testScoresTable.Rows.Add(curTestScoreRow);
     284      Results.Add(new Result("Test scores", testScoresTable));
     285      var lengthsTable = new DataTable("Lengths");
     286      var len1Row = new DataRow("Length 1");
     287      var len2Row = new DataRow("Length 2");
     288      lengthsTable.Rows.Add(len1Row);
     289      lengthsTable.Rows.Add(len2Row);
     290      Results.Add(new Result("Lengths", lengthsTable));
     291
     292      var bestSolutionResult = new Result("Best solution", typeof(IRegressionSolution));
     293      Results.Add(bestSolutionResult);
     294
    280295      // TODO: the following is potentially problematic for other go processes run on the same machine at the same time
    281296      // shouldn't be problematic bc is inherited only, normally only child processes are affected
     
    287302
    288303      //Constants
    289       int sortType = 0;
     304      int sortType = 0; // TODO what's sort type?
    290305      string problemTypeString = "benchmark";
    291306      int numProc = 12;
     
    304319      nTestData = Problem.ProblemData.TestPartition.Size;
    305320
    306       var inputVariableNames = string.Join(" ", problemData.AllowedInputVariables);
    307       // TODO: does this work when input variables contain spaces?
    308       // is split on the go side, just for simpler passing
     321      if (problemData.AllowedInputVariables.Any(iv => iv.Contains(" ")))
     322        throw new NotSupportedException("PGE does not support variable names which contain spaces");
     323
     324      var inputVariableNames = string.Join(" ", problemData.AllowedInputVariables);
    309325
    310326      IntPtr cIndepNames = Marshal.StringToHGlobalAnsi(inputVariableNames);
    311327      IntPtr cDependentNames = Marshal.StringToHGlobalAnsi(problemData.TargetVariable);
    312       // TODO: is it ok to use any variable here?
    313       // Dependent- and Independentnames are the variables from the test/train data, e.g. from "Korns_02.trn" dep: x y z v w  indep: f(xs)
     328      // Dependent- and Independentnames are the variables from the test/train data, e.g. from "Korns_02.trn" indep: x y z v w  dep: f(xs)
    314329
    315330      IntPtr cInitMethod = Marshal.StringToHGlobalAnsi(InitMethod);
     
    326341
    327342      AddTestData(cIndepNames, cDependentNames, testData, nTestData);
    328 
    329343      AddTrainData(cIndepNames, cDependentNames, trainData, nTrainData);
    330344
     
    334348      InitSearch(MaxGen, PgeRptEpoch, PgeRptCount, PgeArchiveCap, PeelCnt, EvalrCount, ZeroEpsilon, cInitMethod, cGrowMethod, sortType);
    335349
    336       // cUsableVars: list of indices into dependent variables
     350      // cUsableVars: list of indices into independent variables
    337351      InitTreeParams(cRoots, cNodes, cNonTrig, cLeafs, cUseableVars, numberOfUseableVariables, MaxSize, MinSize, MaxDepth, MinDepth);
    338352
    339       // SearchVar: list of indices into independent variables (0 for first index)
    340       InitProblem(cName, MaxIterations, HitRatio, SearchVar, cProblemTypeString, numProc);
    341 
    342       var curItersItem = new IntValue();
     353     
     354      InitProblem(cName, MaxIterations, HitRatio,
     355        searchVar: numberOfUseableVariables,  // SearchVar: index of dependent variables (0 for first index), we use the convention to put the target at the end
     356        ProblemTypeString: cProblemTypeString, numProcs: numProc);
    343357
    344358      for (int iter = 1; iter <= MaxIterations; iter++) {
    345         curItersItem.Value = iter;
     359        iterationsResult.Value = iter;
    346360
    347361        int nResults = StepW();
    348362
    349         for (int iResult = 0; iResult < nResults; iResult++) { 
     363        for (int iResult = 0; iResult < nResults; iResult++) {
    350364          int nobestpush = 0;       //bool
    351365          int bestNewMinError = 0;  //bool
    352           int bestlen1 = 0; 
     366          int bestlen1 = 0;
    353367          int bestlen2 = 0;
    354368          int nCoeff = 0;
     
    356370
    357371          IntPtr eqn = GetStepResult(out nobestpush, out bestNewMinError, out bestlen1, out bestlen2, out testScore, out nCoeff);
    358 
    359372          string eqnStr = Marshal.PtrToStringAnsi(eqn);
    360373
    361           Console.WriteLine("Push/Pop (" + bestlen1 + "," + bestlen2 + ") " + eqnStr);
    362           StringBuilder sb = new StringBuilder("");
     374          double[] coeff = new double[nCoeff];
    363375          for (int iCoeff = 0; iCoeff < nCoeff; iCoeff++) {
    364             double coeffVal = GetCoeffResult();
    365             Console.WriteLine("Coeff: " + coeffVal);
    366             sb.Append(coeffVal + "; ");
     376            coeff[iCoeff] = GetCoeffResult();
    367377          }
    368 
    369           var curItersResult = new Result("Iteration " + iter + " " + iResult, curItersItem);
    370           var coeffItersResult = new Result("Coeff " + iter + " " + iResult, new StringValue(sb.ToString()));
    371 
    372           var bestQualityItem = new StringValue(eqnStr);
    373           var bestQualityResult = new Result("Best quality " + iter + " " + iResult, bestQualityItem);
    374           Results.Add(curItersResult);
    375           Results.Add(coeffItersResult);
    376           Results.Add(bestQualityResult);
     378          log.LogMessage("Push/Pop (" + iResult + ", " + bestlen1 + ", " + bestlen2 + ", " + testScore + ", noBestPush: " + (nobestpush > 0) + ", bestNewMin: " + (bestNewMinError > 0) + ") " + eqnStr + " coeff: " + string.Join(" ", coeff));
     379
     380          if (bestNewMinError > 0) {
     381            // update best quality
     382            bestTestScoreResult.Value = testScore;
     383            var sol = CreateSolution(problemData, eqnStr, coeff, problemData.AllowedInputVariables.ToArray());
     384            bestSolutionResult.Value = sol;
     385          }
     386          bestTestScoreRow.Values.Add(bestTestScoreResult.Value); // always add the current best test score to data row
     387          curTestScoreRow.Values.Add(testScore);
     388          len1Row.Values.Add(bestlen1);
     389          len2Row.Values.Add(bestlen2);
    377390        }
    378391
     
    401414    }
    402415
     416    private static readonly Regex varRegex = new Regex(@"X_(\d)+");
     417    private static readonly Regex coeffRegex = new Regex(@"C_(\d)+");
     418
     419    private IRegressionSolution CreateSolution(IRegressionProblemData problemData, string eqnStr, double[] coeff, string[] usableVariables) {
     420      // coefficients are named e.g. "C_0" in the PGE expressions
     421      // -> replace all patterns "C_\d" by the corresponding coefficients
     422      var match = coeffRegex.Match(eqnStr);
     423      while (match.Success) {
     424        var coeffIdx = int.Parse(match.Groups[1].ToString());
     425        eqnStr = eqnStr.Substring(0, match.Index) +
     426          "(" + coeff[coeffIdx].ToString(System.Globalization.CultureInfo.InvariantCulture) + ")" +
     427          eqnStr.Substring(match.Index + match.Length);
     428        match = coeffRegex.Match(eqnStr);
     429      }
     430
     431      // variables are named e.g. "X_0" in the PGE expressions
     432      // -> replace all patterns "X_\d" by the corresponding variable name
     433      match = varRegex.Match(eqnStr);
     434      while (match.Success) {
     435        var varIdx = int.Parse(match.Groups[1].ToString());
     436        eqnStr = eqnStr.Substring(0, match.Index) +
     437          "'" + usableVariables[varIdx] + "'" +
     438          eqnStr.Substring(match.Index + match.Length);
     439        match = varRegex.Match(eqnStr);
     440      }
     441
     442      var parser = new InfixExpressionParser();
     443      var tree = parser.Parse(eqnStr);
     444      var model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
     445      return model.CreateRegressionSolution((IRegressionProblemData)problemData.Clone());
     446    }
     447
    403448    public override bool SupportsPause {
    404449      get { return false; }
     
    423468      double[] val = new double[rows.Count() * dim];
    424469      int r = 0;
    425       foreach(var row in rows) {
     470      foreach (var row in rows) {
    426471        int c = 0;
    427         foreach(var var in variableNames) {
     472        foreach (var var in variableNames) {
    428473          val[r * dim + c] = ds.GetDoubleValue(var, r);
    429474          c++;
  • branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/Plugin.cs

    r15991 r16315  
    33namespace HeuristicLab.Algorithms.DataAnalysis.PGE {
    44  [Plugin("HeuristicLab.Algorithms.DataAnalysis.PGE", "3.3")]
    5   [PluginFile("HeuristicLab.Algorithms.DataAnalysis.PGE-3.3.dll", PluginFileType.Assembly)] // each plugin represents a collection of files. The minimum is one file; the assembly.
     5  [PluginFile("HeuristicLab.Algorithms.DataAnalysis.PGE-3.3.dll", PluginFileType.Assembly)]
     6  [PluginFile("go-pge.dll", PluginFileType.NativeDll)]
    67
    7   // Usually your plugin references other HeuristicLab dlls. If you are referencing files (e.g. assemblies)
    8   // from another plugin the corresponding plugin should be added as a dependency.
    9   // Usually, if this information is incorrect you will still be able to use you plugin, but HL functionality
    10   // which uses plugin dependency resolution will not work correctly. For instance if plugin dependencies are
    11   // not correct then your plugin cannot be used on HeuristicLab.Hive
    12   //
     8  [PluginDependency("HeuristicLab.Analysis", "3.3")]
    139  [PluginDependency("HeuristicLab.Collections", "3.3")]
    1410  [PluginDependency("HeuristicLab.Common", "3.3")]
     
    2016  [PluginDependency("HeuristicLab.Persistence", "3.3")]
    2117  [PluginDependency("HeuristicLab.Problems.DataAnalysis", "3.4")]
     18  [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic", "3.4")]
    2219  [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression", "3.4")]
     20  [PluginDependency("HeuristicLab.Problems.Instances", "3.3")]
    2321  [PluginDependency("HeuristicLab.Random", "3.3")]
    2422
    25   // HL plugin infrastructure discovers plugins on startup by trying to load all .dll and .exe files and looking for
    26   // classes deriving from PluginBase. The meta-information for the plugin class is specified in the attributes
    27   // above and used by plugin infrastructure primarily for plugin dependency resolution.
    28 
    29   // Steps:
    30   // (1) Check out HL source code (e.g. the trunk version)
    31   // (2) Build external libraries HeuristicLab.ExtLibs.sln using the Build.cmd (in the path of the HL source code)
    32   // (3) Build HeuristicLab 3.3.sln using the Build.cmd
    33   // (4) Check that the output file has been added to the HL binaries folder (hl/bin/HeuristicLab.Algorithms.DataAnalysis.PGE-3.3.dll)
    34   // (5) Start hl/bin/HeuristicLab.exe and open the "Plugin Manager".
    35   //      Make sure your EmptyPlugin appears in the list of loaded plugins
    3623  public class Plugin : PluginBase {
    3724  }
  • branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs

    r15583 r16315  
    4242  /// Expr          = ['-' | '+'] Term { '+' Term | '-' Term }
    4343  /// Term          = Fact { '*' Fact | '/' Fact }
    44   /// Fact          = '(' Expr ')'
    45   ///                 | 'LAG' '(' varId ',' ['+' | '-' ] number ')'
    46   ///                 | funcId '(' ArgList ')'
    47   ///                 | VarExpr | number
     44  /// Fact          = SimpleFact [ '^' SimpleFact ]
     45  /// SimpleFact    = '(' Expr ')'
     46  ///                 | '{' Expr '}'
     47  ///                 | 'LAG' '(' varId ',' ['+' | '-' ] number ')
     48  ///                 | funcId '(' ArgList ')'
     49  ///                 | VarExpr
     50  ///                 | number
    4851  /// ArgList       = Expr { ',' Expr }
    4952  /// VarExpr       = varId OptFactorPart
     
    9598        { "*", new Multiplication()},
    9699        { "-", new Subtraction()},
     100        { "^", new Power() },
    97101        { "EXP", new Exponential()},
    98102        { "LOG", new Logarithm()},
     
    167171            && str[pos] != '*'
    168172            && str[pos] != '/'
     173            && str[pos] != '^'
    169174            && str[pos] != ')'
    170175            && str[pos] != ']'
     176            && str[pos] != '}'
    171177            && str[pos] != ',') {
    172178            sb.Append(str[pos]);
     
    227233          pos++;
    228234          yield return new Token { TokenType = TokenType.Operator, strVal = "*" };
     235        } else if (str[pos] == '^') {
     236          pos++;
     237          yield return new Token { TokenType = TokenType.Operator, strVal = "^" };
    229238        } else if (str[pos] == '(') {
    230239          pos++;
     
    239248          pos++;
    240249          yield return new Token { TokenType = TokenType.RightBracket, strVal = "]" };
     250        } else if (str[pos] == '{') {
     251          pos++;
     252          yield return new Token { TokenType = TokenType.LeftPar, strVal = "{" };
     253        } else if (str[pos] == '}') {
     254          pos++;
     255          yield return new Token { TokenType = TokenType.RightPar, strVal = "}" };
    241256        } else if (str[pos] == '=') {
    242257          pos++;
     
    360375    }
    361376
    362     /// Fact          = '(' Expr ')'
    363     ///                 | 'LAG' '(' varId ',' ['+' | '-' ] number ')'
    364     ///                 | funcId '(' ArgList ')'
    365     ///                 | VarExpr | number
     377    // Fact = SimpleFact ['^' SimpleFact]
     378    private ISymbolicExpressionTreeNode ParseFact(Queue<Token> tokens) {
     379      var expr = ParseSimpleFact(tokens);
     380      var next = tokens.Peek();
     381      if (next.TokenType == TokenType.Operator && next.strVal == "^") {
     382        tokens.Dequeue(); // skip;
     383
     384        var p = GetSymbol("^").CreateTreeNode();
     385        p.AddSubtree(expr);
     386        p.AddSubtree(ParseSimpleFact(tokens));
     387        expr = p;
     388      }
     389      return expr;
     390    }
     391
     392
     393    /// SimpleFact   = '(' Expr ')'
     394    ///                 | '{' Expr '}'
     395    ///                 | 'LAG' '(' varId ',' ['+' | '-' ] number ')'
     396    ///                 | funcId '(' ArgList ')
     397    ///                 | VarExpr
     398    ///                 | number
    366399    /// ArgList       = Expr { ',' Expr }
    367400    /// VarExpr       = varId OptFactorPart
     
    370403    /// varVal        =  ident | ' ident ' | " ident "
    371404    /// ident         =  '_' | letter { '_' | letter | digit }
    372     private ISymbolicExpressionTreeNode ParseFact(Queue<Token> tokens) {
     405    private ISymbolicExpressionTreeNode ParseSimpleFact(Queue<Token> tokens) {
    373406      var next = tokens.Peek();
    374407      if (next.TokenType == TokenType.LeftPar) {
    375         tokens.Dequeue();
     408        var initPar = tokens.Dequeue(); // match par type
    376409        var expr = ParseExpr(tokens);
    377410        var rPar = tokens.Dequeue();
    378411        if (rPar.TokenType != TokenType.RightPar)
    379           throw new ArgumentException("expected )");
     412          throw new ArgumentException("expected closing parenthesis");
     413        if (initPar.strVal == "(" && rPar.strVal == "}")
     414          throw new ArgumentException("expected closing )");
     415        if (initPar.strVal == "{" && rPar.strVal == ")")
     416          throw new ArgumentException("expected closing }");
    380417        return expr;
    381418      } else if (next.TokenType == TokenType.Identifier) {
     
    424461          if (rPar.TokenType != TokenType.RightPar)
    425462            throw new ArgumentException("expected )");
     463
    426464
    427465          return funcNode;
Note: See TracChangeset for help on using the changeset viewer.