using HEAL.Attic; using HeuristicLab.Algorithms.DataAnalysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Random; using System; using System.Collections.Generic; using System.IO; using System.Linq; using CancellationToken = System.Threading.CancellationToken; using ExecutionContext = HeuristicLab.Core.ExecutionContext; namespace HeuristicLab.Algorithms.EvolvmentModelsOfModels { [Item("ModelSetPreparation", "Model Set preparation algorithm.")] [Creatable(CreatableAttribute.Categories.Algorithms, Priority = 125)] [StorableType("3C5DF308-DB79-4ACD-894B-F795F081726B")] public class ModelSetPreparation : FixedDataAnalysisAlgorithm { #region data members [Storable] protected ExecutionContext executionContext; [Storable] protected ExecutionState previousExecutionState; [Storable] protected IEnumerable trees; [Storable] protected ExecutionState currentExecutionState; #endregion #region parameters private const string SeedParameterName = "Seed"; private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; private const string RandomParameterName = "Random"; private const string InputFileParameterName = "InputFile"; private const string AlgorithmImplementationTypeParameterName = "AlgorithmImplementationType"; private const string GoalParameterName = "Goal"; private const string DistanceTypeParameterName = "DistanceType"; private const string MapParameterName = "Map"; public IFixedValueParameter SeedParameter { get { return (IFixedValueParameter)Parameters[SeedParameterName]; } } public IConstrainedValueParameter AlgorithmImplementationTypeParameter { get { return (IConstrainedValueParameter)Parameters[AlgorithmImplementationTypeParameterName]; } } public IConstrainedValueParameter GoalParameter { get { return (IConstrainedValueParameter)Parameters[GoalParameterName]; } } public IConstrainedValueParameter DistanceTypeParameter { get { return (IConstrainedValueParameter)Parameters[DistanceTypeParameterName]; } } public IConstrainedValueParameter> MapParameter { get { return (IConstrainedValueParameter>)Parameters[MapParameterName]; } } public IFixedValueParameter InputFileParameter { get { return (IFixedValueParameter)Parameters[InputFileParameterName]; } } public IFixedValueParameter SetSeedRandomlyParameter { get { return (IFixedValueParameter)Parameters[SetSeedRandomlyParameterName]; } } public IValueParameter RandomParameter { get { return (IValueParameter)Parameters[RandomParameterName]; } } #endregion #region parameter properties public int Seed { get { return SeedParameter.Value.Value; } set { SeedParameter.Value.Value = value; } } public StringValue AlgorithmImplemetationType { get { return AlgorithmImplementationTypeParameter.Value; } set { AlgorithmImplementationTypeParameter.Value.Value = value.Value; } } public StringValue Goal { get { return GoalParameter.Value; } set { GoalParameter.Value.Value = value.Value; } } public StringValue DistanceType { get { return DistanceTypeParameter.Value; } set { DistanceTypeParameter.Value.Value = value.Value; } } public EMMMapBase Map { get { return MapParameter.Value; } set { MapParameter.Value = value; } } public StringValue InputFile { get { return InputFileParameter.Value; } set { InputFileParameter.Value.Value = value.Value; } } public bool SetSeedRandomly { get { return SetSeedRandomlyParameter.Value.Value; } set { SetSeedRandomlyParameter.Value.Value = value; } } #endregion #region constructors public ModelSetPreparation() { Parameters.Add(new FixedValueParameter(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); Parameters.Add(new FixedValueParameter(InputFileParameterName, "The file with set of models that will be.", new StringValue("input.txt"))); Parameters.Add(new ConstrainedValueParameter(AlgorithmImplementationTypeParameterName, "The Type of possible algorithm implementation, choose one: OnlyMap, Full, Read.")); Parameters.Add(new ConstrainedValueParameter(GoalParameterName, "The goal of algorithm implementation, choose one: ToSee, ToWork, Full.")); Parameters.Add(new ConstrainedValueParameter(DistanceTypeParameterName, "The Type of possible distance calculator for case of only distance calculation.")); Parameters.Add(new ConstrainedValueParameter>(MapParameterName, "The type of map creation algorithm. Use one from: IslandMap, NetworkMap.")); Parameters.Add(new FixedValueParameter(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); Parameters.Add(new ValueParameter(RandomParameterName, new MersenneTwister())); //begin hack ... InputFile.ValueChanged += InputFile_ValueChanged; InfixExpressionParser parser = new InfixExpressionParser(); trees = File.ReadAllLines(InputFileParameter.Value.Value).Select(parser.Parse).ToArray(); // end hack ProblemChanged += ModelSetPreporation_ProblemChanged; MapParameterUpdate(); } // also hack !!!!!!!!!!!!!!!!!!!!!!!!! private void InputFile_ValueChanged(object sender, EventArgs e) { InfixExpressionParser parser = new InfixExpressionParser(); trees = File.ReadAllLines(InputFileParameter.Value.Value).Select(parser.Parse); } // remove again !!!!!!!!!!!!!!!!!!!!!! private void ModelSetPreporation_ProblemChanged(object sender, EventArgs e) { if (Problem != null) { Problem.SymbolicExpressionTreeInterpreter = new SymbolicDataAnalysisExpressionTreeBatchInterpreter(); } } protected void MapParameterUpdate() { var mapTypes = new EMMMapBase[] { new EMMZeroMap (), new EMMIslandMap(), new EMMNetworkMap(), new EMMDisatanceMap(), new EMMRankMap(), new EMMSucsessMap () }; foreach (var t in mapTypes) { MapParameter.ValidValues.Add(t); } var algorithmType = new StringValue[] { new StringValue ("DistanceCalculation"), new StringValue ("OnlyMap"), new StringValue ("Statistic") }; foreach (var t in algorithmType) { AlgorithmImplementationTypeParameter.ValidValues.Add(t); } var goal = new StringValue[] { new StringValue ("ToWork"), new StringValue ("ToSee"), new StringValue ("Full") }; foreach (var t in goal) { GoalParameter.ValidValues.Add(t); } var distanceType = new StringValue[] { new StringValue("MSE"), new StringValue("PearsonsRSquared"), new StringValue ("Covariance"), new StringValue ("MaxAbsoluteError"), new StringValue ("MeanAbsoluteError"), new StringValue ("Symbolic") }; foreach (var t in distanceType) { DistanceTypeParameter.ValidValues.Add(t); } } protected ModelSetPreparation(ModelSetPreparation original, Cloner cloner) : base(original, cloner) { previousExecutionState = original.previousExecutionState; if (original.executionContext != null) { executionContext = cloner.Clone(original.executionContext); } // hack trees = original.trees.Select(x => cloner.Clone(x)).ToArray(); } [StorableConstructor] protected ModelSetPreparation(StorableConstructorFlag _) : base(_) { } public override IDeepCloneable Clone(Cloner cloner) { return new ModelSetPreparation(this, cloner); } #endregion #region technical stuff public override void Prepare() { base.Prepare(); } protected override void Initialize(CancellationToken cancellationToken) { base.Initialize(cancellationToken); } public override bool SupportsPause => true; // implements random number generation from https://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation #region operator wiring and events private void ParameterizeStochasticOperator(IOperator op) { IStochasticOperator stochasticOp = op as IStochasticOperator; if (stochasticOp != null) { stochasticOp.RandomParameter.ActualName = "Random"; stochasticOp.RandomParameter.Hidden = true; } } protected void ExecuteOperation(ExecutionContext executionContext, CancellationToken cancellationToken, IOperation operation) { Stack executionStack = new Stack(); executionStack.Push(operation); while (executionStack.Count > 0) { cancellationToken.ThrowIfCancellationRequested(); IOperation next = executionStack.Pop(); if (next is OperationCollection coll) { for (int i = coll.Count - 1; i >= 0; i--) if (coll[i] != null) executionStack.Push(coll[i]); } else if (next is IAtomicOperation op) { next = op.Operator.Execute((IExecutionContext)op, cancellationToken); if (next != null) executionStack.Push(next); } } } protected override void OnProblemChanged() { base.OnProblemChanged(); } protected override void OnExecutionStateChanged() { previousExecutionState = currentExecutionState; currentExecutionState = ExecutionState; base.OnExecutionStateChanged(); } protected override void OnStopped() { if (executionContext != null) { if (executionContext.Scope != null) { if (executionContext.Scope.SubScopes != null) { executionContext.Scope.SubScopes.Clear(); } } } base.OnStopped(); } #endregion #endregion #region algorithm implementation protected override void Run(CancellationToken cancellationToken) { Map.DistanceParametr = DistanceType.Value; //distance calculation or reading that should be done in any cases string fileNameForWatch = "DistanceMatrix_Watch" + DistanceType + ".txt"; string fileName = "DistanceMatrix_" + DistanceType + ".txt"; double[,] totalDistance; if (AlgorithmImplemetationType.Value == "DistanceCalculation") { totalDistance = TotalDistanceMatrixCalculation(RandomParameter.Value, Problem, trees.ToList(), DistanceType.Value); if (Goal.Value != "ToWork") { FileComuncations.DoubleMatrixPrint(fileNameForWatch, totalDistance, trees.Count()); } if (Goal.Value != "ToSee") { FileComuncations.DoubleMatrixSerialize(fileName, totalDistance); } } else { totalDistance = FileComuncations.DoubleMatrixDeserialize(fileName); // totalDistance = FileComuncations.DoubleMatrixFromFileRead(fileName, trees.Count()); } if (AlgorithmImplemetationType.Value == "Statistic") { var statistic = new int[trees.Count(), trees.Count()]; for (int i = 0; i < trees.Count(); i++) { for (int j = 0; j < trees.Count(); j++) statistic[i, j] = 0; } var maps = new List>>(); int repetitionNumber = 10; Map.MapCreationPrepare(trees); for (int i = 0; i < repetitionNumber; i++) { Map.CreateMap(RandomParameter.Value, totalDistance); maps.Add(Map.Map); CheckClusters(statistic); Map.Map.Clear(); } } else { // Simple map creation case Map.MapCreationPrepare(trees); Map.CreateMap(RandomParameter.Value, totalDistance); Map.WriteMapToTxtFile(RandomParameter.Value);// This should be deactivated in case of using HIVE. HIVE can not work with it. } } protected void CheckClusters(int[,] info) { // ToDo: It should be realized for statistics collection } #region distance manipulation public static double[,] DistanceMatrixCalculation(List trees, string distanceType, ISymbolicDataAnalysisSingleObjectiveProblem Problem) { var problemData = (IRegressionProblemData)Problem.ProblemData; var dataset = problemData.Dataset; var rows = problemData.TrainingIndices; var interpreter = Problem.SymbolicExpressionTreeInterpreter; string[] toWrite = new string[trees.Count()]; int i = 0; var treeValues = new List>(); if (distanceType != "Symbolic") { foreach (var tree in trees) { treeValues.Add(interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows).ToList()); } } double[,] distances = new double[trees.Count, trees.Count]; OnlineCalculatorError err; switch (distanceType) { case "MSE": for (i = 0; i < trees.Count - 1; i++) { for (int j = i + 1; j < trees.Count; j++) { distances[j, i] = distances[i, j] = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ; } } break; case "PearsonsRSquared": for (i = 0; i < trees.Count - 1; i++) { for (int j = i + 1; j < trees.Count; j++) { distances[j, i] = distances[i, j] = OnlinePearsonsRCalculator.Calculate(treeValues[i], treeValues[j], out err); ; } } break; case "Covariance": for (i = 0; i < trees.Count - 1; i++) { for (int j = i + 1; j < trees.Count; j++) { distances[j, i] = distances[i, j] = OnlineCovarianceCalculator.Calculate(treeValues[i], treeValues[j], out err); ; } } break; case "MaxAbsoluteError": for (i = 0; i < trees.Count - 1; i++) { for (int j = i + 1; j < trees.Count; j++) { distances[j, i] = distances[i, j] = OnlineMaxAbsoluteErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ; } } break; case "MeanAbsoluteError": for (i = 0; i < trees.Count - 1; i++) { for (int j = i + 1; j < trees.Count; j++) { distances[j, i] = distances[i, j] = OnlineMeanAbsoluteErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ; } } break; case "Symbolic": distances = SymbolicExpressionTreeHash.ComputeSimilarityMatrix(trees, simplify: false, strict: true); for (i = 0; i < trees.Count - 1; i++) { for (int j = i + 1; j < trees.Count; j++) { distances[j, i] = distances[i, j] = 1 - distances[i, j]; } } break; } return distances; } public static double[,] CalculateDistances(List treesSet) { double[,] distances; distances = SymbolicExpressionTreeHash.ComputeSimilarityMatrix(treesSet, simplify: false, strict: true); for (int i = 0; i < treesSet.Count - 1; i++) { for (int j = i + 1; j < treesSet.Count; j++) { distances[j, i] = distances[i, j] = 1 - distances[i, j]; } } return distances; } public static double[,] TotalDistanceMatrixCalculation(IRandom random, ISymbolicDataAnalysisSingleObjectiveProblem problem, List treesSet, string distanceType) { var setSize = treesSet.Count(); var totalDistance = new double[setSize, setSize]; var treeSetTemp = new List(); foreach (var tree in treesSet) { treeSetTemp.Add((ISymbolicExpressionTree)tree.Clone()); } if (distanceType != "Symbolic") { int repitNumber = 10; totalDistance = new double[setSize, setSize]; for (int i = 0; i < setSize; i++) { for (int j = 0; j < setSize; j++) { totalDistance[i, j] = 0; } } for (int i = 0; i < repitNumber; i++) { foreach (var tree in treeSetTemp) { HelpFunctions.SetLocalParametersForTree(random, 0.5, tree); } var distanceMatrix = DistanceMatrixCalculation(treeSetTemp, distanceType, problem); for (int t = 0; t < setSize; t++) { for (int j = 0; j < setSize; j++) { totalDistance[t, j] += Math.Abs(distanceMatrix[t, j]) / repitNumber; } } } } else { foreach (var tree in treeSetTemp) { HelpFunctions.SetLocalParametersForTree(random, 0.5, tree); } totalDistance = CalculateDistances(treeSetTemp); } return totalDistance; } #endregion #endregion } }