using System; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; using System.Threading; using HeuristicLab.Analysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using System.Text.RegularExpressions; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; namespace PGE { [Item(Name = "Priorizied Grammar Enumeration (PGE)", Description = "Priorizied grammar enumeration algorithm. Worm, T. and Chiu K., 'Prioritized Grammar Enumeration: Symbolic Regression by Dynamic Programming'. GECCO 2013")] [Creatable(Category = CreatableAttribute.Categories.Algorithms, Priority = 999)] [StorableClass] public unsafe class PGE : BasicAlgorithm { [DllImport("go-pge.dll", EntryPoint = "addTestData", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void AddTestData([MarshalAs(UnmanagedType.AnsiBStr)] string indepNames, [MarshalAs(UnmanagedType.AnsiBStr)] string depndNames, double[] matrix, int nEntries); [DllImport("go-pge.dll", EntryPoint = "addTrainData", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void AddTrainData([MarshalAs(UnmanagedType.AnsiBStr)] string indepNames, [MarshalAs(UnmanagedType.AnsiBStr)] string depndNames, double[] matrix, int nEntries); [DllImport("go-pge.dll", EntryPoint = "initSearch", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void InitSearch(int maxGen, int pgeRptEpoch, int pgeRptCount, int pgeArchiveCap, int peelCnt, int evalrCount, double zeroEpsilon, [MarshalAs(UnmanagedType.AnsiBStr)] string initMethod, [MarshalAs(UnmanagedType.AnsiBStr)] string growMethod, int sortType); [DllImport("go-pge.dll", EntryPoint = "initTreeParams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void InitTreeParams([MarshalAs(UnmanagedType.AnsiBStr)] string roots, [MarshalAs(UnmanagedType.AnsiBStr)] string nodes, [MarshalAs(UnmanagedType.AnsiBStr)] string nonTrig, [MarshalAs(UnmanagedType.AnsiBStr)] string leafs, int numUsableVars, int maxSize, int minSize, int maxDepth, int minDepth); [DllImport("go-pge.dll", EntryPoint = "initProblem", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void InitProblem([MarshalAs(UnmanagedType.AnsiBStr)] string name, int maxIter, double hitRatio, int searchVar, [MarshalAs(UnmanagedType.AnsiBStr)] string ProblemTypeString, int numProcs); [DllImport("go-pge.dll", EntryPoint = "stepW", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern int StepW(); [DllImport("go-pge.dll", EntryPoint = "getStepResult", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern IntPtr GetStepResult(out int testscore, out int nCoeff); [DllImport("go-pge.dll", EntryPoint = "getCoeffResult", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern double GetCoeffResult(); public override Type ProblemType { get { return typeof(RegressionProblem); } } public new RegressionProblem Problem { get { return (RegressionProblem)base.Problem; } } #region parameter names private static readonly string MaxIterationsParameterName = "MaxIterations"; private static readonly string MaxGenParameterName = "MaxGen"; private static readonly string EvalrCountParameterName = "EvalrCount"; private static readonly string MaxSizeParameterName = "MaxSize"; private static readonly string MinSizeParameterName = "MinSize"; private static readonly string MaxDepthParameterName = "MaxDepth"; private static readonly string MinDepthParameterName = "MinDepth"; private static readonly string PgeRptEpochParameterName = "PgeRptEpoch"; private static readonly string PgeRptCountParameterName = "PgeRptCount"; private static readonly string PgeArchiveCapParameterName = "PgeArchiveCap"; private static readonly string PeelCntParameterName = "PeelCnt"; private static readonly string ZeroEpsilonParameterName = "ZeroEpsilon"; private static readonly string HitRatioParameterName = "HitRatio"; private static readonly string InitMethodParameterName = "InitMethod"; private static readonly string GrowMethodParameterName = "GrowMethod"; private static readonly string RootsParameterName = "Roots"; private static readonly string NodesParameterName = "Nodes"; private static readonly string NonTrigParameterName = "NonTrig"; private static readonly string LeafsParameterName = "Leafs"; #endregion #region parameters private IFixedValueParameter MaxIterationsParameter { get { return (IFixedValueParameter)Parameters[MaxIterationsParameterName]; } } public int MaxIterations { get { return MaxIterationsParameter.Value.Value; } set { MaxIterationsParameter.Value.Value = value; } } private IFixedValueParameter MaxGenParameter { get { return (IFixedValueParameter)Parameters[MaxGenParameterName]; } } public int MaxGen { get { return MaxGenParameter.Value.Value; } set { MaxGenParameter.Value.Value = value; } } private IFixedValueParameter EvalrCountParameter { get { return (IFixedValueParameter)Parameters[EvalrCountParameterName]; } } public int EvalrCount { get { return EvalrCountParameter.Value.Value; } set { EvalrCountParameter.Value.Value = value; } } private IFixedValueParameter MaxSizeParameter { get { return (IFixedValueParameter)Parameters[MaxSizeParameterName]; } } public int MaxSize { get { return MaxSizeParameter.Value.Value; } set { MaxSizeParameter.Value.Value = value; } } private IFixedValueParameter MinSizeParameter { get { return (IFixedValueParameter)Parameters[MinSizeParameterName]; } } public int MinSize { get { return MinSizeParameter.Value.Value; } set { MinSizeParameter.Value.Value = value; } } private IFixedValueParameter MaxDepthParameter { get { return (IFixedValueParameter)Parameters[MaxDepthParameterName]; } } public int MaxDepth { get { return MaxDepthParameter.Value.Value; } set { MaxDepthParameter.Value.Value = value; } } private IFixedValueParameter MinDepthParameter { get { return (IFixedValueParameter)Parameters[MinDepthParameterName]; } } public int MinDepth { get { return MinDepthParameter.Value.Value; } set { MinDepthParameter.Value.Value = value; } } private IFixedValueParameter PgeRptEpochParameter { get { return (IFixedValueParameter)Parameters[PgeRptEpochParameterName]; } } public int PgeRptEpoch { get { return PgeRptEpochParameter.Value.Value; } set { PgeRptEpochParameter.Value.Value = value; } } private IFixedValueParameter PgeRptCountParameter { get { return (IFixedValueParameter)Parameters[PgeRptCountParameterName]; } } public int PgeRptCount { get { return PgeRptCountParameter.Value.Value; } set { PgeRptCountParameter.Value.Value = value; } } private IFixedValueParameter PgeArchiveCapParameter { get { return (IFixedValueParameter)Parameters[PgeArchiveCapParameterName]; } } public int PgeArchiveCap { get { return PgeArchiveCapParameter.Value.Value; } set { PgeArchiveCapParameter.Value.Value = value; } } private IFixedValueParameter PeelCntParameter { get { return (IFixedValueParameter)Parameters[PeelCntParameterName]; } } public int PeelCnt { get { return PeelCntParameter.Value.Value; } set { PeelCntParameter.Value.Value = value; } } private IFixedValueParameter ZeroEpsilonParameter { get { return (IFixedValueParameter)Parameters[ZeroEpsilonParameterName]; } } public double ZeroEpsilon { get { return ZeroEpsilonParameter.Value.Value; } set { ZeroEpsilonParameter.Value.Value = value; } } private IFixedValueParameter HitRatioParameter { get { return (IFixedValueParameter)Parameters[HitRatioParameterName]; } } public double HitRatio { get { return HitRatioParameter.Value.Value; } set { HitRatioParameter.Value.Value = value; } } private IFixedValueParameter InitMethodParameter { get { return (IFixedValueParameter)Parameters[InitMethodParameterName]; } } public string InitMethod { get { return InitMethodParameter.Value.Value; } set { InitMethodParameter.Value.Value = value; } } private IFixedValueParameter GrowMethodParameter { get { return (IFixedValueParameter)Parameters[GrowMethodParameterName]; } } public string GrowMethod { get { return GrowMethodParameter.Value.Value; } set { GrowMethodParameter.Value.Value = value; } } private IFixedValueParameter RootsParameter { get { return (IFixedValueParameter)Parameters[RootsParameterName]; } } public string Roots { get { return RootsParameter.Value.Value; } set { RootsParameter.Value.Value = value; } } private IFixedValueParameter NodesParameter { get { return (IFixedValueParameter)Parameters[NodesParameterName]; } } public string Nodes { get { return NodesParameter.Value.Value; } set { NodesParameter.Value.Value = value; } } private IFixedValueParameter NonTrigParameter { get { return (IFixedValueParameter)Parameters[NonTrigParameterName]; } } public string NonTrig { get { return NonTrigParameter.Value.Value; } set { NonTrigParameter.Value.Value = value; } } private IFixedValueParameter LeafsParameter { get { return (IFixedValueParameter)Parameters[LeafsParameterName]; } } public string Leafs { get { return LeafsParameter.Value.Value; } set { LeafsParameter.Value.Value = value; } } #endregion public PGE() { base.Problem = new RegressionProblem(); // algorithm parameters are shown in the GUI Parameters.Add(new FixedValueParameter(MaxIterationsParameterName, new IntValue(50))); Parameters.Add(new FixedValueParameter(MinDepthParameterName, new IntValue(1))); Parameters.Add(new FixedValueParameter(MaxDepthParameterName, new IntValue(6))); Parameters.Add(new FixedValueParameter(MinSizeParameterName, new IntValue(4))); Parameters.Add(new FixedValueParameter(MaxSizeParameterName, new IntValue(50))); Parameters.Add(new FixedValueParameter(EvalrCountParameterName, new IntValue(2))); Parameters.Add(new FixedValueParameter(PeelCntParameterName, new IntValue(3))); Parameters.Add(new FixedValueParameter(PgeArchiveCapParameterName, new IntValue(256))); Parameters.Add(new FixedValueParameter(PgeRptCountParameterName, new IntValue(20))); Parameters.Add(new FixedValueParameter(PgeRptEpochParameterName, new IntValue(1))); Parameters.Add(new FixedValueParameter(MaxGenParameterName, new IntValue(200))); Parameters.Add(new FixedValueParameter(InitMethodParameterName, new StringValue("method1"))); // TODO Dropdown Parameters.Add(new FixedValueParameter(GrowMethodParameterName, new StringValue("method1"))); Parameters.Add(new FixedValueParameter(RootsParameterName, new StringValue("Add"))); // TODO: checkeditemlist Parameters.Add(new FixedValueParameter(NodesParameterName, new StringValue("Add Mul"))); // TODO: checkeditemlist Parameters.Add(new FixedValueParameter(NonTrigParameterName, new StringValue("Add Mul"))); // TODO: checkeditemlist Parameters.Add(new FixedValueParameter(LeafsParameterName, new StringValue("Var ConstantF"))); Parameters.Add(new FixedValueParameter(ZeroEpsilonParameterName, new DoubleValue(0.00001))); Parameters.Add(new FixedValueParameter(HitRatioParameterName, new DoubleValue(0.01))); } [StorableConstructor] public PGE(bool deserializing) : base(deserializing) { } public PGE(PGE original, Cloner cloner) : base(original, cloner) { // nothing to clone } public override IDeepCloneable Clone(Cloner cloner) { return new PGE(this, cloner); } protected override void Run(CancellationToken cancellationToken) { Log log = new Log(); Results.Add(new Result("Log", log)); var iterationsResult = new IntValue(0); Results.Add(new Result("Iteration", iterationsResult)); var bestMSEResult = new DoubleValue(); Results.Add(new Result("Best MSE", bestMSEResult)); var testScoresTable = new DataTable("Test scores"); var bestTestScoreRow = new DataRow("Best test score"); var curTestScoreRow = new DataRow("Current test score"); testScoresTable.Rows.Add(bestTestScoreRow); testScoresTable.Rows.Add(curTestScoreRow); Results.Add(new Result("Test scores", testScoresTable)); var lengthsTable = new DataTable("Lengths"); var len1Row = new DataRow("Length 1"); var len2Row = new DataRow("Length 2"); lengthsTable.Rows.Add(len1Row); lengthsTable.Rows.Add(len2Row); Results.Add(new Result("Lengths", lengthsTable)); var bestSolutionResult = new Result("Best solution", typeof(IRegressionSolution)); Results.Add(bestSolutionResult); var allSolutions = new ItemList(); var allSolutionsResult = new Result("Solutions", allSolutions); Results.Add(allSolutionsResult); // TODO: the following is potentially problematic for other go processes run on the same machine at the same time // shouldn't be problematic bc is inherited only, normally only child processes are affected Environment.SetEnvironmentVariable("GOGC", "off"); Environment.SetEnvironmentVariable("GODEBUG", "cgocheck=0"); Environment.SetEnvironmentVariable("CGO_ENABLED", "1"); Environment.SetEnvironmentVariable("PGEDEBUG", "0"); //Constants int sortType = 0; // TODO what's sort type? // //1 = PESORT_PARETO_TRN_ERR //0 = PESORT_PARETO_TST_ERR string problemTypeString = "benchmark"; int numProc = 12; string problemName = Problem.ProblemData.Name; var problemData = Problem.ProblemData; var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); // no idea why the following are IntPtr, this should not be necessary for marshalling, it should be ok to just send the double[,] double[] trainData = GetData(problemData.Dataset, variables, problemData.TrainingIndices); double[] testData = GetData(problemData.Dataset, variables, problemData.TestIndices); int nTrainData = Problem.ProblemData.TrainingPartition.Size; int nTestData = Problem.ProblemData.TestPartition.Size; if (problemData.AllowedInputVariables.Any(iv => iv.Contains(" "))) throw new NotSupportedException("PGE does not support variable names which contain spaces"); var inputVariableNames = string.Join(" ", problemData.AllowedInputVariables); AddTestData(inputVariableNames, problemData.TargetVariable, testData, nTestData); AddTrainData(inputVariableNames, problemData.TargetVariable, trainData, nTrainData); int numberOfUseableVariables = problemData.AllowedInputVariables.Count(); InitSearch(MaxGen, PgeRptEpoch, PgeRptCount, PgeArchiveCap, PeelCnt, EvalrCount, ZeroEpsilon, InitMethod, GrowMethod, sortType); // cUsableVars: list of indices into independent variables InitTreeParams(Roots, Nodes, NonTrig, Leafs, numberOfUseableVariables, MaxSize, MinSize, MaxDepth, MinDepth); InitProblem(Name, MaxIterations, HitRatio, searchVar: 0, // SearchVar: index of dependent variables (this is always zero because we only have one target variable) ProblemTypeString: problemTypeString, numProcs: numProc); var bestMSE = double.MaxValue; for (int iter = 1; iter <= MaxIterations; iter++) { iterationsResult.Value = iter; int nResults = StepW(); for (int iResult = 0; iResult < nResults; iResult++) { int nCoeff = 0; int testScore = 0; IntPtr eqn = GetStepResult(out testScore, out nCoeff); string eqnStr = Marshal.PtrToStringAnsi(eqn); double[] coeff = new double[nCoeff]; for (int iCoeff = 0; iCoeff < nCoeff; iCoeff++) { coeff[iCoeff] = GetCoeffResult(); } log.LogMessage("Push/Pop (" + iResult + ", " + testScore + ") " + eqnStr + " coeff: " + string.Join(" ", coeff)); if (!string.IsNullOrEmpty(eqnStr)) { var sol = CreateSolution(problemData, eqnStr, coeff, problemData.AllowedInputVariables.ToArray()); allSolutions.Add(sol); if (sol.TrainingMeanSquaredError < bestMSE) { // update best quality bestMSE = sol.TrainingMeanSquaredError; bestMSEResult.Value = bestMSE; bestSolutionResult.Value = sol; } } bestTestScoreRow.Values.Add(bestMSEResult.Value); // always add the current best test score to data row curTestScoreRow.Values.Add(testScore); } if (cancellationToken.IsCancellationRequested) break; } // Results.Add(new Result("Execution time", new TimeSpanValue(this.ExecutionTime))); } private static readonly Regex varRegex = new Regex(@"X_(\d)+"); private static readonly Regex coeffRegex = new Regex(@"C_(\d)+"); private IRegressionSolution CreateSolution(IRegressionProblemData problemData, string eqnStr, double[] coeff, string[] usableVariables) { // coefficients are named e.g. "C_0" in the PGE expressions // -> replace all patterns "C_\d" by the corresponding coefficients var match = coeffRegex.Match(eqnStr); while (match.Success) { var coeffIdx = int.Parse(match.Groups[1].ToString()); eqnStr = eqnStr.Substring(0, match.Index) + "(" + coeff[coeffIdx].ToString(System.Globalization.CultureInfo.InvariantCulture) + ")" + eqnStr.Substring(match.Index + match.Length); match = coeffRegex.Match(eqnStr); } // variables are named e.g. "X_0" in the PGE expressions // -> replace all patterns "X_\d" by the corresponding variable name match = varRegex.Match(eqnStr); while (match.Success) { var varIdx = int.Parse(match.Groups[1].ToString()); eqnStr = eqnStr.Substring(0, match.Index) + "'" + usableVariables[varIdx] + "'" + eqnStr.Substring(match.Index + match.Length); match = varRegex.Match(eqnStr); } var parser = new InfixExpressionParser(); var tree = parser.Parse(eqnStr); var model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()); return model.CreateRegressionSolution((IRegressionProblemData)problemData.Clone()); } public override bool SupportsPause { get { return false; } } private static double[] GetData(IDataset ds, IEnumerable variableNames, IEnumerable rows) { var dim = variableNames.Count(); double[] val = new double[rows.Count() * dim]; int r = 0; foreach (var row in rows) { int c = 0; foreach (var var in variableNames) { val[r * dim + c] = ds.GetDoubleValue(var, r); c++; } r++; } return val; } } }