using System; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Threading; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; namespace PGE { [Item(Name = "Priorizied Grammar Enumeration (PGE)", Description = "Priorizied grammar enumeration algorithm. Worm, T. and Chiu K., 'Prioritized Grammar Enumeration: Symbolic Regression by Dynamic Programming'. GECCO 2013")] [Creatable(Category = CreatableAttribute.Categories.Algorithms, Priority = 999)] [StorableClass] public unsafe class PGE : BasicAlgorithm { [DllImport("go-pge.dll", EntryPoint = "addTestData", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void AddTestData(IntPtr indepNames, IntPtr depndNames, IntPtr matrix, int nEntries); [DllImport("go-pge.dll", EntryPoint = "addTrainData", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void AddTrainData(IntPtr indepNames, IntPtr depndNames, IntPtr matrix, int nEntries); [DllImport("go-pge.dll", EntryPoint = "initSearch", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void InitSearch(int maxGen, int pgeRptEpoch, int pgeRptCount, int pgeArchiveCap, int peelCnt, int evalrCount, double zeroEpsilon, IntPtr initMethod, IntPtr growMethod, int sortType); [DllImport("go-pge.dll", EntryPoint = "initTreeParams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void InitTreeParams(IntPtr roots, IntPtr nodes, IntPtr nonTrig, IntPtr leafs, IntPtr usableVars, int numUsableVars, int maxSize, int minSize, int maxDepth, int minDepth); [DllImport("go-pge.dll", EntryPoint = "initProblem", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void InitProblem(IntPtr name, int maxIter, double hitRatio, int searchVar, IntPtr ProblemTypeString, int numProcs); [DllImport("go-pge.dll", EntryPoint = "stepW", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern void StepW(); [DllImport("go-pge.dll", EntryPoint = "getStepResult", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern IntPtr GetStepResult(out int noBestPush, out int bestNewMinErr, out int bestlen1, out int bestlen2, out int testscore, out int nCoeff); [DllImport("go-pge.dll", EntryPoint = "getCoeffResult", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)] public static extern double GetCoeffResult(); public override Type ProblemType { get { return typeof(RegressionProblem); } } public new RegressionProblem Problem { get { return (RegressionProblem)base.Problem; } } #region parameter names private static readonly string MaxIterationsParameterName = "MaxIterations"; private static readonly string MaxGenParameterName = "MaxGen"; private static readonly string EvalrCountParameterName = "EvalrCount"; private static readonly string MaxSizeParameterName = "MaxSize"; private static readonly string MinSizeParameterName = "MinSize"; private static readonly string MaxDepthParameterName = "MaxDepth"; private static readonly string SearchVarParameterName = "SearchVar"; private static readonly string MinDepthParameterName = "MinDepth"; private static readonly string PgeRptEpochParameterName = "PgeRptEpoch"; private static readonly string PgeRptCountParameterName = "PgeRptCount"; private static readonly string PgeArchiveCapParameterName = "PgeArchiveCap"; private static readonly string PeelCntParameterName = "PeelCnt"; private static readonly string ZeroEpsilonParameterName = "ZeroEpsilon"; private static readonly string HitRatioParameterName = "HitRatio"; private static readonly string InitMethodParameterName = "InitMethod"; private static readonly string GrowMethodParameterName = "GrowMethod"; private static readonly string RootsParameterName = "Roots"; private static readonly string NodesParameterName = "Nodes"; private static readonly string NonTrigParameterName = "NonTrig"; private static readonly string LeafsParameterName = "Leafs"; #endregion #region parameters private IFixedValueParameter MaxIterationsParameter { get { return (IFixedValueParameter)Parameters[MaxIterationsParameterName]; } } public int MaxIterations { get { return MaxIterationsParameter.Value.Value; } set { MaxIterationsParameter.Value.Value = value; } } private IFixedValueParameter MaxGenParameter { get { return (IFixedValueParameter)Parameters[MaxGenParameterName]; } } public int MaxGen { get { return MaxGenParameter.Value.Value; } set { MaxGenParameter.Value.Value = value; } } private IFixedValueParameter EvalrCountParameter { get { return (IFixedValueParameter)Parameters[EvalrCountParameterName]; } } public int EvalrCount { get { return EvalrCountParameter.Value.Value; } set { EvalrCountParameter.Value.Value = value; } } private IFixedValueParameter MaxSizeParameter { get { return (IFixedValueParameter)Parameters[MaxSizeParameterName]; } } public int MaxSize { get { return MaxSizeParameter.Value.Value; } set { MaxSizeParameter.Value.Value = value; } } private IFixedValueParameter MinSizeParameter { get { return (IFixedValueParameter)Parameters[MinSizeParameterName]; } } public int MinSize { get { return MinSizeParameter.Value.Value; } set { MinSizeParameter.Value.Value = value; } } private IFixedValueParameter MaxDepthParameter { get { return (IFixedValueParameter)Parameters[MaxDepthParameterName]; } } public int MaxDepth { get { return MaxDepthParameter.Value.Value; } set { MaxDepthParameter.Value.Value = value; } } private IFixedValueParameter SearchVarParameter { get { return (IFixedValueParameter)Parameters[SearchVarParameterName]; } } public int SearchVar { get { return SearchVarParameter.Value.Value; } set { SearchVarParameter.Value.Value = value; } } private IFixedValueParameter MinDepthParameter { get { return (IFixedValueParameter)Parameters[MinDepthParameterName]; } } public int MinDepth { get { return MinDepthParameter.Value.Value; } set { MinDepthParameter.Value.Value = value; } } private IFixedValueParameter PgeRptEpochParameter { get { return (IFixedValueParameter)Parameters[PgeRptEpochParameterName]; } } public int PgeRptEpoch { get { return PgeRptEpochParameter.Value.Value; } set { PgeRptEpochParameter.Value.Value = value; } } private IFixedValueParameter PgeRptCountParameter { get { return (IFixedValueParameter)Parameters[PgeRptCountParameterName]; } } public int PgeRptCount { get { return PgeRptCountParameter.Value.Value; } set { PgeRptCountParameter.Value.Value = value; } } private IFixedValueParameter PgeArchiveCapParameter { get { return (IFixedValueParameter)Parameters[PgeArchiveCapParameterName]; } } public int PgeArchiveCap { get { return PgeArchiveCapParameter.Value.Value; } set { PgeArchiveCapParameter.Value.Value = value; } } private IFixedValueParameter PeelCntParameter { get { return (IFixedValueParameter)Parameters[PeelCntParameterName]; } } public int PeelCnt { get { return PeelCntParameter.Value.Value; } set { PeelCntParameter.Value.Value = value; } } private IFixedValueParameter ZeroEpsilonParameter { get { return (IFixedValueParameter)Parameters[ZeroEpsilonParameterName]; } } public double ZeroEpsilon { get { return ZeroEpsilonParameter.Value.Value; } set { ZeroEpsilonParameter.Value.Value = value; } } private IFixedValueParameter HitRatioParameter { get { return (IFixedValueParameter)Parameters[HitRatioParameterName]; } } public double HitRatio { get { return HitRatioParameter.Value.Value; } set { HitRatioParameter.Value.Value = value; } } private IFixedValueParameter InitMethodParameter { get { return (IFixedValueParameter)Parameters[InitMethodParameterName]; } } public string InitMethod { get { return InitMethodParameter.Value.Value; } set { InitMethodParameter.Value.Value = value; } } private IFixedValueParameter GrowMethodParameter { get { return (IFixedValueParameter)Parameters[GrowMethodParameterName]; } } public string GrowMethod { get { return GrowMethodParameter.Value.Value; } set { GrowMethodParameter.Value.Value = value; } } private IFixedValueParameter RootsParameter { get { return (IFixedValueParameter)Parameters[RootsParameterName]; } } public string Roots { get { return RootsParameter.Value.Value; } set { RootsParameter.Value.Value = value; } } private IFixedValueParameter NodesParameter { get { return (IFixedValueParameter)Parameters[NodesParameterName]; } } public string Nodes { get { return NodesParameter.Value.Value; } set { NodesParameter.Value.Value = value; } } private IFixedValueParameter NonTrigParameter { get { return (IFixedValueParameter)Parameters[NonTrigParameterName]; } } public string NonTrig { get { return NonTrigParameter.Value.Value; } set { NonTrigParameter.Value.Value = value; } } private IFixedValueParameter LeafsParameter { get { return (IFixedValueParameter)Parameters[LeafsParameterName]; } } public string Leafs { get { return LeafsParameter.Value.Value; } set { LeafsParameter.Value.Value = value; } } #endregion public PGE() { base.Problem = new RegressionProblem(); // algorithm parameters are shown in the GUI Parameters.Add(new FixedValueParameter(MaxIterationsParameterName, new IntValue(50))); Parameters.Add(new FixedValueParameter(SearchVarParameterName, new IntValue(0))); Parameters.Add(new FixedValueParameter(MinDepthParameterName, new IntValue(1))); Parameters.Add(new FixedValueParameter(MaxDepthParameterName, new IntValue(6))); Parameters.Add(new FixedValueParameter(MinSizeParameterName, new IntValue(4))); Parameters.Add(new FixedValueParameter(MaxSizeParameterName, new IntValue(50))); Parameters.Add(new FixedValueParameter(EvalrCountParameterName, new IntValue(2))); Parameters.Add(new FixedValueParameter(PeelCntParameterName, new IntValue(3))); Parameters.Add(new FixedValueParameter(PgeArchiveCapParameterName, new IntValue(256))); Parameters.Add(new FixedValueParameter(PgeRptCountParameterName, new IntValue(20))); Parameters.Add(new FixedValueParameter(PgeRptEpochParameterName, new IntValue(1))); Parameters.Add(new FixedValueParameter(MaxGenParameterName, new IntValue(200))); Parameters.Add(new FixedValueParameter(InitMethodParameterName, new StringValue("method1"))); // TODO Dropdown Parameters.Add(new FixedValueParameter(GrowMethodParameterName, new StringValue("method1"))); Parameters.Add(new FixedValueParameter(RootsParameterName, new StringValue("Add"))); // TODO: checkeditemlist Parameters.Add(new FixedValueParameter(NodesParameterName, new StringValue("Add Mul"))); // TODO: checkeditemlist Parameters.Add(new FixedValueParameter(NonTrigParameterName, new StringValue("Add Mul"))); // TODO: checkeditemlist Parameters.Add(new FixedValueParameter(LeafsParameterName, new StringValue("Var ConstantF"))); Parameters.Add(new FixedValueParameter(ZeroEpsilonParameterName, new DoubleValue(0.00001))); Parameters.Add(new FixedValueParameter(HitRatioParameterName, new DoubleValue(0.01))); } [StorableConstructor] public PGE(bool deserializing) : base(deserializing) { } public PGE(PGE original, Cloner cloner) : base(original, cloner) { // nothing to clone } public override IDeepCloneable Clone(Cloner cloner) { return new PGE(this, cloner); } protected override void Run(CancellationToken cancellationToken) { // TODO: the following is potentially problematic for other go processes run on the same machine at the same time Environment.SetEnvironmentVariable("GOGC", "off"); Environment.SetEnvironmentVariable("GODEBUG", "cgocheck=0"); Environment.SetEnvironmentVariable("CGO_ENABLED", "1"); //Constants int sortType = 0; string problemTypeString = "benchmark"; int numProc = 12; string problemName = Problem.ProblemData.Name; var problemData = Problem.ProblemData; var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); // no idea why the following are IntPtr, this should not be necessary for marshalling, it should be ok to just send the double[,] int nTrainData; int nTestData; IntPtr trainData = GetData(problemData.Dataset, variables, problemData.TrainingIndices, out nTrainData); IntPtr testData = GetData(problemData.Dataset, variables, problemData.TestIndices, out nTestData); var inputVariableNames = string.Join(" ", problemData.AllowedInputVariables); // TODO: does this work when input variables contain spaces? // is split on the go side, just for simpler passing IntPtr cIndepNames = Marshal.StringToHGlobalAnsi(inputVariableNames); IntPtr cDependentNames = Marshal.StringToHGlobalAnsi(problemData.TargetVariable); // TODO: is it ok to use any variable here? // Dependent- and Independentnames are the variables from the test/train data, e.g. from "Korns_02.trn" dep: x y z v w indep: f(xs) IntPtr cInitMethod = Marshal.StringToHGlobalAnsi(InitMethod); IntPtr cGrowMethod = Marshal.StringToHGlobalAnsi(GrowMethod); IntPtr cRoots = Marshal.StringToHGlobalAnsi(Roots); IntPtr cNodes = Marshal.StringToHGlobalAnsi(Nodes); IntPtr cNonTrig = Marshal.StringToHGlobalAnsi(NonTrig); IntPtr cLeafs = Marshal.StringToHGlobalAnsi(Leafs); IntPtr cName = Marshal.StringToHGlobalAnsi(problemName); IntPtr cProblemTypeString = Marshal.StringToHGlobalAnsi(problemTypeString); AddTestData(cIndepNames, cDependentNames, testData, nTestData); AddTrainData(cIndepNames, cDependentNames, trainData, nTrainData); int numberOfUseableVariables = problemData.AllowedInputVariables.Count(); IntPtr cUseableVars = GetUsableVars(numberOfUseableVariables); InitSearch(MaxGen, PgeRptEpoch, PgeRptCount, PgeArchiveCap, PeelCnt, EvalrCount, ZeroEpsilon, cInitMethod, cGrowMethod, sortType); // cUsableVars: list of indices into dependent variables InitTreeParams(cRoots, cNodes, cNonTrig, cLeafs, cUseableVars, numberOfUseableVariables, MaxSize, MinSize, MaxDepth, MinDepth); // SearchVar: list of indices into independent variables (0 for first index) InitProblem(cName, MaxIterations, HitRatio, SearchVar, cProblemTypeString, numProc); var curItersItem = new IntValue(); for (int iter = 1; iter <= MaxIterations; iter++) { curItersItem.Value = iter; StepW(); // TODO: alg crashes here for (int iPeel = 0; iPeel < PeelCnt; iPeel++) { int nobestpush = 0; //bool int bestNewMinError = 0; //bool int bestlen1 = 0; int bestlen2 = 0; int nCoeff = 0; int testScore = 0; IntPtr eqn = GetStepResult(out nobestpush, out bestNewMinError, out bestlen1, out bestlen2, out testScore, out nCoeff); string eqnStr = Marshal.PtrToStringAnsi(eqn); if (nobestpush == 1) { Console.WriteLine("No best push"); } else { Console.WriteLine("Push/Pop (" + bestlen1 + "," + bestlen2 + ") " + eqnStr); StringBuilder sb = new StringBuilder(""); for (int iCoeff = 0; iCoeff < nCoeff; iCoeff++) { double coeffVal = GetCoeffResult(); Console.WriteLine("Coeff: " + coeffVal); sb.Append(coeffVal + "; "); } var curItersResult = new Result("Iteration " + iter + " " + iPeel, curItersItem); var coeffItersResult = new Result("Coeff " + iter + " " + iPeel, new StringValue(sb.ToString())); var bestQualityItem = new StringValue(eqnStr); var bestQualityResult = new Result("Best quality " + iter + " " + iPeel, bestQualityItem); Results.Add(curItersResult); Results.Add(coeffItersResult); Results.Add(bestQualityResult); } } if (cancellationToken.IsCancellationRequested) break; } Marshal.FreeHGlobal(cUseableVars); Marshal.FreeHGlobal(trainData); Marshal.FreeHGlobal(testData); // Results.Add(new Result("Execution time", new TimeSpanValue(this.ExecutionTime))); } public override bool SupportsPause { get { return false; } } private static IntPtr GetUsableVars(int n) { long[] vars = new long[n]; for (int i = 0; i < n; i++) { vars[i] = i; } IntPtr usableVars = Marshal.AllocHGlobal(sizeof(long) * n); Marshal.Copy(vars, 0, usableVars, n); return usableVars; } private static IntPtr GetData(IDataset ds, IEnumerable variableNames, IEnumerable rows, out int n) { var dim = variableNames.Count(); double[] val = new double[rows.Count() * dim]; int r = 0; foreach(var row in rows) { int c = 0; foreach(var var in variableNames) { val[r * dim + c] = ds.GetDoubleValue(var, r); c++; } r++; } n = val.Length; // TODO: seems strange to marshal this explicitly, we can just send the data over to PGE IntPtr data = Marshal.AllocHGlobal(sizeof(double) * val.Length); Marshal.Copy(val, 0, data, val.Length); return data; } } }