Changeset 16315
- Timestamp:
- 11/21/18 18:17:18 (6 years ago)
- Location:
- branches/2929_PrioritizedGrammarEnumeration
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/HeuristicLab.Algorithms.DataAnalysis.PGE-3.3.csproj
r16199 r16315 72 72 <HintPath>..\..\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath> 73 73 </Reference> 74 <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 75 <SpecificVersion>False</SpecificVersion> 76 <HintPath>..\..\bin\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.dll</HintPath> 77 </Reference> 74 78 <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 75 79 <SpecificVersion>False</SpecificVersion> … … 103 107 </ItemGroup> 104 108 <ItemGroup> 109 <ProjectReference Include="..\..\HeuristicLab.Analysis\3.3\HeuristicLab.Analysis-3.3.csproj"> 110 <Project>{887425B4-4348-49ED-A457-B7D2C26DDBF9}</Project> 111 <Name>HeuristicLab.Analysis-3.3</Name> 112 </ProjectReference> 105 113 <ProjectReference Include="..\..\HeuristicLab.Problems.Instances\3.3\HeuristicLab.Problems.Instances-3.3.csproj"> 106 114 <Project>{3540E29E-4793-49E7-8EE2-FEA7F61C3994}</Project> -
branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/PGE.cs
r16231 r16315 3 3 using System.Linq; 4 4 using System.Runtime.InteropServices; 5 using System.Text;6 5 using System.Threading; 6 using HeuristicLab.Analysis; 7 7 using HeuristicLab.Common; 8 8 using HeuristicLab.Core; 9 9 using HeuristicLab.Data; 10 using HeuristicLab.Optimization; 10 using System.Text.RegularExpressions; 11 using HeuristicLab.Optimization; 11 12 using HeuristicLab.Parameters; 12 13 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 13 using HeuristicLab.Problems.DataAnalysis; 14 using HeuristicLab.Problems.DataAnalysis; 15 using HeuristicLab.Problems.DataAnalysis.Symbolic; 16 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; 14 17 15 18 namespace PGE { … … 18 21 [Creatable(Category = CreatableAttribute.Categories.Algorithms, Priority = 999)] 19 22 20 [StorableClass] 23 [StorableClass] 21 24 public unsafe class PGE : BasicAlgorithm { 22 25 … … 49 52 50 53 #region parameter names 51 private static readonly string MaxIterationsParameterName = "MaxIterations"; 54 private static readonly string MaxIterationsParameterName = "MaxIterations"; 52 55 private static readonly string MaxGenParameterName = "MaxGen"; 53 56 private static readonly string EvalrCountParameterName = "EvalrCount"; … … 55 58 private static readonly string MinSizeParameterName = "MinSize"; 56 59 private static readonly string MaxDepthParameterName = "MaxDepth"; 57 private static readonly string SearchVarParameterName = "SearchVar";58 60 private static readonly string MinDepthParameterName = "MinDepth"; 59 61 private static readonly string PgeRptEpochParameterName = "PgeRptEpoch"; … … 62 64 private static readonly string PeelCntParameterName = "PeelCnt"; 63 65 private static readonly string ZeroEpsilonParameterName = "ZeroEpsilon"; 64 private static readonly string HitRatioParameterName = "HitRatio"; 66 private static readonly string HitRatioParameterName = "HitRatio"; 65 67 private static readonly string InitMethodParameterName = "InitMethod"; 66 68 private static readonly string GrowMethodParameterName = "GrowMethod"; … … 121 123 } 122 124 123 private IFixedValueParameter<IntValue> SearchVarParameter {124 get { return (IFixedValueParameter<IntValue>)Parameters[SearchVarParameterName]; }125 }126 public int SearchVar {127 get { return SearchVarParameter.Value.Value; }128 set { SearchVarParameter.Value.Value = value; }129 }130 131 125 private IFixedValueParameter<IntValue> MinDepthParameter { 132 126 get { return (IFixedValueParameter<IntValue>)Parameters[MinDepthParameterName]; } … … 240 234 // algorithm parameters are shown in the GUI 241 235 Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, new IntValue(50))); 242 Parameters.Add(new FixedValueParameter<IntValue>(SearchVarParameterName, new IntValue(0)));243 236 Parameters.Add(new FixedValueParameter<IntValue>(MinDepthParameterName, new IntValue(1))); 244 237 Parameters.Add(new FixedValueParameter<IntValue>(MaxDepthParameterName, new IntValue(6))); … … 250 243 Parameters.Add(new FixedValueParameter<IntValue>(PgeRptCountParameterName, new IntValue(20))); 251 244 Parameters.Add(new FixedValueParameter<IntValue>(PgeRptEpochParameterName, new IntValue(1))); 252 Parameters.Add(new FixedValueParameter<IntValue>(MaxGenParameterName, new IntValue(200))); 253 245 Parameters.Add(new FixedValueParameter<IntValue>(MaxGenParameterName, new IntValue(200))); 246 254 247 Parameters.Add(new FixedValueParameter<StringValue>(InitMethodParameterName, new StringValue("method1"))); // TODO Dropdown 255 248 Parameters.Add(new FixedValueParameter<StringValue>(GrowMethodParameterName, new StringValue("method1"))); … … 264 257 } 265 258 266 259 267 260 [StorableConstructor] 268 261 public PGE(bool deserializing) : base(deserializing) { } 269 262 270 271 public PGE(PGE original, Cloner cloner) : base(original, cloner) { 263 264 public PGE(PGE original, Cloner cloner) : base(original, cloner) { 272 265 // nothing to clone 273 266 } … … 278 271 279 272 protected override void Run(CancellationToken cancellationToken) { 273 Log log = new Log(); 274 Results.Add(new Result("Log", log)); 275 var iterationsResult = new IntValue(0); 276 Results.Add(new Result("Iteration", iterationsResult)); 277 var bestTestScoreResult = new IntValue(0); // TODO: why is test score an int? 278 Results.Add(new Result("Best test score", bestTestScoreResult)); 279 var testScoresTable = new DataTable("Test scores"); 280 var bestTestScoreRow = new DataRow("Best test score"); 281 var curTestScoreRow = new DataRow("Current test score"); 282 testScoresTable.Rows.Add(bestTestScoreRow); 283 testScoresTable.Rows.Add(curTestScoreRow); 284 Results.Add(new Result("Test scores", testScoresTable)); 285 var lengthsTable = new DataTable("Lengths"); 286 var len1Row = new DataRow("Length 1"); 287 var len2Row = new DataRow("Length 2"); 288 lengthsTable.Rows.Add(len1Row); 289 lengthsTable.Rows.Add(len2Row); 290 Results.Add(new Result("Lengths", lengthsTable)); 291 292 var bestSolutionResult = new Result("Best solution", typeof(IRegressionSolution)); 293 Results.Add(bestSolutionResult); 294 280 295 // TODO: the following is potentially problematic for other go processes run on the same machine at the same time 281 296 // shouldn't be problematic bc is inherited only, normally only child processes are affected … … 287 302 288 303 //Constants 289 int sortType = 0; 304 int sortType = 0; // TODO what's sort type? 290 305 string problemTypeString = "benchmark"; 291 306 int numProc = 12; … … 304 319 nTestData = Problem.ProblemData.TestPartition.Size; 305 320 306 var inputVariableNames = string.Join(" ", problemData.AllowedInputVariables); 307 // TODO: does this work when input variables contain spaces? 308 // is split on the go side, just for simpler passing 321 if (problemData.AllowedInputVariables.Any(iv => iv.Contains(" "))) 322 throw new NotSupportedException("PGE does not support variable names which contain spaces"); 323 324 var inputVariableNames = string.Join(" ", problemData.AllowedInputVariables); 309 325 310 326 IntPtr cIndepNames = Marshal.StringToHGlobalAnsi(inputVariableNames); 311 327 IntPtr cDependentNames = Marshal.StringToHGlobalAnsi(problemData.TargetVariable); 312 // TODO: is it ok to use any variable here? 313 // Dependent- and Independentnames are the variables from the test/train data, e.g. from "Korns_02.trn" dep: x y z v w indep: f(xs) 328 // Dependent- and Independentnames are the variables from the test/train data, e.g. from "Korns_02.trn" indep: x y z v w dep: f(xs) 314 329 315 330 IntPtr cInitMethod = Marshal.StringToHGlobalAnsi(InitMethod); … … 326 341 327 342 AddTestData(cIndepNames, cDependentNames, testData, nTestData); 328 329 343 AddTrainData(cIndepNames, cDependentNames, trainData, nTrainData); 330 344 … … 334 348 InitSearch(MaxGen, PgeRptEpoch, PgeRptCount, PgeArchiveCap, PeelCnt, EvalrCount, ZeroEpsilon, cInitMethod, cGrowMethod, sortType); 335 349 336 // cUsableVars: list of indices into dependent variables350 // cUsableVars: list of indices into independent variables 337 351 InitTreeParams(cRoots, cNodes, cNonTrig, cLeafs, cUseableVars, numberOfUseableVariables, MaxSize, MinSize, MaxDepth, MinDepth); 338 352 339 // SearchVar: list of indices into independent variables (0 for first index)340 InitProblem(cName, MaxIterations, HitRatio, SearchVar, cProblemTypeString, numProc);341 342 var curItersItem = new IntValue();353 354 InitProblem(cName, MaxIterations, HitRatio, 355 searchVar: numberOfUseableVariables, // SearchVar: index of dependent variables (0 for first index), we use the convention to put the target at the end 356 ProblemTypeString: cProblemTypeString, numProcs: numProc); 343 357 344 358 for (int iter = 1; iter <= MaxIterations; iter++) { 345 curItersItem.Value = iter;359 iterationsResult.Value = iter; 346 360 347 361 int nResults = StepW(); 348 362 349 for (int iResult = 0; iResult < nResults; iResult++) { 363 for (int iResult = 0; iResult < nResults; iResult++) { 350 364 int nobestpush = 0; //bool 351 365 int bestNewMinError = 0; //bool 352 int bestlen1 = 0; 366 int bestlen1 = 0; 353 367 int bestlen2 = 0; 354 368 int nCoeff = 0; … … 356 370 357 371 IntPtr eqn = GetStepResult(out nobestpush, out bestNewMinError, out bestlen1, out bestlen2, out testScore, out nCoeff); 358 359 372 string eqnStr = Marshal.PtrToStringAnsi(eqn); 360 373 361 Console.WriteLine("Push/Pop (" + bestlen1 + "," + bestlen2 + ") " + eqnStr); 362 StringBuilder sb = new StringBuilder(""); 374 double[] coeff = new double[nCoeff]; 363 375 for (int iCoeff = 0; iCoeff < nCoeff; iCoeff++) { 364 double coeffVal = GetCoeffResult(); 365 Console.WriteLine("Coeff: " + coeffVal); 366 sb.Append(coeffVal + "; "); 376 coeff[iCoeff] = GetCoeffResult(); 367 377 } 368 369 var curItersResult = new Result("Iteration " + iter + " " + iResult, curItersItem); 370 var coeffItersResult = new Result("Coeff " + iter + " " + iResult, new StringValue(sb.ToString())); 371 372 var bestQualityItem = new StringValue(eqnStr); 373 var bestQualityResult = new Result("Best quality " + iter + " " + iResult, bestQualityItem); 374 Results.Add(curItersResult); 375 Results.Add(coeffItersResult); 376 Results.Add(bestQualityResult); 378 log.LogMessage("Push/Pop (" + iResult + ", " + bestlen1 + ", " + bestlen2 + ", " + testScore + ", noBestPush: " + (nobestpush > 0) + ", bestNewMin: " + (bestNewMinError > 0) + ") " + eqnStr + " coeff: " + string.Join(" ", coeff)); 379 380 if (bestNewMinError > 0) { 381 // update best quality 382 bestTestScoreResult.Value = testScore; 383 var sol = CreateSolution(problemData, eqnStr, coeff, problemData.AllowedInputVariables.ToArray()); 384 bestSolutionResult.Value = sol; 385 } 386 bestTestScoreRow.Values.Add(bestTestScoreResult.Value); // always add the current best test score to data row 387 curTestScoreRow.Values.Add(testScore); 388 len1Row.Values.Add(bestlen1); 389 len2Row.Values.Add(bestlen2); 377 390 } 378 391 … … 401 414 } 402 415 416 private static readonly Regex varRegex = new Regex(@"X_(\d)+"); 417 private static readonly Regex coeffRegex = new Regex(@"C_(\d)+"); 418 419 private IRegressionSolution CreateSolution(IRegressionProblemData problemData, string eqnStr, double[] coeff, string[] usableVariables) { 420 // coefficients are named e.g. "C_0" in the PGE expressions 421 // -> replace all patterns "C_\d" by the corresponding coefficients 422 var match = coeffRegex.Match(eqnStr); 423 while (match.Success) { 424 var coeffIdx = int.Parse(match.Groups[1].ToString()); 425 eqnStr = eqnStr.Substring(0, match.Index) + 426 "(" + coeff[coeffIdx].ToString(System.Globalization.CultureInfo.InvariantCulture) + ")" + 427 eqnStr.Substring(match.Index + match.Length); 428 match = coeffRegex.Match(eqnStr); 429 } 430 431 // variables are named e.g. "X_0" in the PGE expressions 432 // -> replace all patterns "X_\d" by the corresponding variable name 433 match = varRegex.Match(eqnStr); 434 while (match.Success) { 435 var varIdx = int.Parse(match.Groups[1].ToString()); 436 eqnStr = eqnStr.Substring(0, match.Index) + 437 "'" + usableVariables[varIdx] + "'" + 438 eqnStr.Substring(match.Index + match.Length); 439 match = varRegex.Match(eqnStr); 440 } 441 442 var parser = new InfixExpressionParser(); 443 var tree = parser.Parse(eqnStr); 444 var model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()); 445 return model.CreateRegressionSolution((IRegressionProblemData)problemData.Clone()); 446 } 447 403 448 public override bool SupportsPause { 404 449 get { return false; } … … 423 468 double[] val = new double[rows.Count() * dim]; 424 469 int r = 0; 425 foreach (var row in rows) {470 foreach (var row in rows) { 426 471 int c = 0; 427 foreach (var var in variableNames) {472 foreach (var var in variableNames) { 428 473 val[r * dim + c] = ds.GetDoubleValue(var, r); 429 474 c++; -
branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/Plugin.cs
r15991 r16315 3 3 namespace HeuristicLab.Algorithms.DataAnalysis.PGE { 4 4 [Plugin("HeuristicLab.Algorithms.DataAnalysis.PGE", "3.3")] 5 [PluginFile("HeuristicLab.Algorithms.DataAnalysis.PGE-3.3.dll", PluginFileType.Assembly)] // each plugin represents a collection of files. The minimum is one file; the assembly. 5 [PluginFile("HeuristicLab.Algorithms.DataAnalysis.PGE-3.3.dll", PluginFileType.Assembly)] 6 [PluginFile("go-pge.dll", PluginFileType.NativeDll)] 6 7 7 // Usually your plugin references other HeuristicLab dlls. If you are referencing files (e.g. assemblies) 8 // from another plugin the corresponding plugin should be added as a dependency. 9 // Usually, if this information is incorrect you will still be able to use you plugin, but HL functionality 10 // which uses plugin dependency resolution will not work correctly. For instance if plugin dependencies are 11 // not correct then your plugin cannot be used on HeuristicLab.Hive 12 // 8 [PluginDependency("HeuristicLab.Analysis", "3.3")] 13 9 [PluginDependency("HeuristicLab.Collections", "3.3")] 14 10 [PluginDependency("HeuristicLab.Common", "3.3")] … … 20 16 [PluginDependency("HeuristicLab.Persistence", "3.3")] 21 17 [PluginDependency("HeuristicLab.Problems.DataAnalysis", "3.4")] 18 [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic", "3.4")] 22 19 [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression", "3.4")] 20 [PluginDependency("HeuristicLab.Problems.Instances", "3.3")] 23 21 [PluginDependency("HeuristicLab.Random", "3.3")] 24 22 25 // HL plugin infrastructure discovers plugins on startup by trying to load all .dll and .exe files and looking for26 // classes deriving from PluginBase. The meta-information for the plugin class is specified in the attributes27 // above and used by plugin infrastructure primarily for plugin dependency resolution.28 29 // Steps:30 // (1) Check out HL source code (e.g. the trunk version)31 // (2) Build external libraries HeuristicLab.ExtLibs.sln using the Build.cmd (in the path of the HL source code)32 // (3) Build HeuristicLab 3.3.sln using the Build.cmd33 // (4) Check that the output file has been added to the HL binaries folder (hl/bin/HeuristicLab.Algorithms.DataAnalysis.PGE-3.3.dll)34 // (5) Start hl/bin/HeuristicLab.exe and open the "Plugin Manager".35 // Make sure your EmptyPlugin appears in the list of loaded plugins36 23 public class Plugin : PluginBase { 37 24 } -
branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs
r15583 r16315 42 42 /// Expr = ['-' | '+'] Term { '+' Term | '-' Term } 43 43 /// Term = Fact { '*' Fact | '/' Fact } 44 /// Fact = '(' Expr ')' 45 /// | 'LAG' '(' varId ',' ['+' | '-' ] number ')' 46 /// | funcId '(' ArgList ')' 47 /// | VarExpr | number 44 /// Fact = SimpleFact [ '^' SimpleFact ] 45 /// SimpleFact = '(' Expr ')' 46 /// | '{' Expr '}' 47 /// | 'LAG' '(' varId ',' ['+' | '-' ] number ') 48 /// | funcId '(' ArgList ')' 49 /// | VarExpr 50 /// | number 48 51 /// ArgList = Expr { ',' Expr } 49 52 /// VarExpr = varId OptFactorPart … … 95 98 { "*", new Multiplication()}, 96 99 { "-", new Subtraction()}, 100 { "^", new Power() }, 97 101 { "EXP", new Exponential()}, 98 102 { "LOG", new Logarithm()}, … … 167 171 && str[pos] != '*' 168 172 && str[pos] != '/' 173 && str[pos] != '^' 169 174 && str[pos] != ')' 170 175 && str[pos] != ']' 176 && str[pos] != '}' 171 177 && str[pos] != ',') { 172 178 sb.Append(str[pos]); … … 227 233 pos++; 228 234 yield return new Token { TokenType = TokenType.Operator, strVal = "*" }; 235 } else if (str[pos] == '^') { 236 pos++; 237 yield return new Token { TokenType = TokenType.Operator, strVal = "^" }; 229 238 } else if (str[pos] == '(') { 230 239 pos++; … … 239 248 pos++; 240 249 yield return new Token { TokenType = TokenType.RightBracket, strVal = "]" }; 250 } else if (str[pos] == '{') { 251 pos++; 252 yield return new Token { TokenType = TokenType.LeftPar, strVal = "{" }; 253 } else if (str[pos] == '}') { 254 pos++; 255 yield return new Token { TokenType = TokenType.RightPar, strVal = "}" }; 241 256 } else if (str[pos] == '=') { 242 257 pos++; … … 360 375 } 361 376 362 /// Fact = '(' Expr ')' 363 /// | 'LAG' '(' varId ',' ['+' | '-' ] number ')' 364 /// | funcId '(' ArgList ')' 365 /// | VarExpr | number 377 // Fact = SimpleFact ['^' SimpleFact] 378 private ISymbolicExpressionTreeNode ParseFact(Queue<Token> tokens) { 379 var expr = ParseSimpleFact(tokens); 380 var next = tokens.Peek(); 381 if (next.TokenType == TokenType.Operator && next.strVal == "^") { 382 tokens.Dequeue(); // skip; 383 384 var p = GetSymbol("^").CreateTreeNode(); 385 p.AddSubtree(expr); 386 p.AddSubtree(ParseSimpleFact(tokens)); 387 expr = p; 388 } 389 return expr; 390 } 391 392 393 /// SimpleFact = '(' Expr ')' 394 /// | '{' Expr '}' 395 /// | 'LAG' '(' varId ',' ['+' | '-' ] number ')' 396 /// | funcId '(' ArgList ') 397 /// | VarExpr 398 /// | number 366 399 /// ArgList = Expr { ',' Expr } 367 400 /// VarExpr = varId OptFactorPart … … 370 403 /// varVal = ident | ' ident ' | " ident " 371 404 /// ident = '_' | letter { '_' | letter | digit } 372 private ISymbolicExpressionTreeNode Parse Fact(Queue<Token> tokens) {405 private ISymbolicExpressionTreeNode ParseSimpleFact(Queue<Token> tokens) { 373 406 var next = tokens.Peek(); 374 407 if (next.TokenType == TokenType.LeftPar) { 375 tokens.Dequeue();408 var initPar = tokens.Dequeue(); // match par type 376 409 var expr = ParseExpr(tokens); 377 410 var rPar = tokens.Dequeue(); 378 411 if (rPar.TokenType != TokenType.RightPar) 379 throw new ArgumentException("expected )"); 412 throw new ArgumentException("expected closing parenthesis"); 413 if (initPar.strVal == "(" && rPar.strVal == "}") 414 throw new ArgumentException("expected closing )"); 415 if (initPar.strVal == "{" && rPar.strVal == ")") 416 throw new ArgumentException("expected closing }"); 380 417 return expr; 381 418 } else if (next.TokenType == TokenType.Identifier) { … … 424 461 if (rPar.TokenType != TokenType.RightPar) 425 462 throw new ArgumentException("expected )"); 463 426 464 427 465 return funcNode;
Note: See TracChangeset
for help on using the changeset viewer.