Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2988_ModelsOfModels2/HeuristicLab.Algorithms.EMM/ModelSetPreporation.cs @ 17877

Last change on this file since 17877 was 17134, checked in by msemenki, 5 years ago

#2988:

  1. The file system was changed, folders was added and part of files was transferred in these folders.
  2. HelpFunctions class was divided on 2 parts: HelpFuctions for common purposes static functions and SelfConfiguration that include functions for self-configuration mechanism realization (is used in EMMSucsessMap).
  3. Parts of self-configuration mechanism was transferred from EMMSucsessMap.cs to SelfConfiguration.cs. Now EMMSucsessMap used SelfConfiguration like one of data member. Other parts of project was adopted for this changing.
  4. FileComunication class was added. It include the majority of functions for printing to files or reading from files. Here were realized possibility to write and read to hl files.
  5. ModelTreeNode.cs has additional possibility - to write sub-model in string (then it is possible to write it in file).
  6. InfixExpressionFormatter.cs can work with TreeModelNode.
  7. Possibility for different map types to be readable from files was extended and cheeked.
  8. Such parameters like - ClusterNumbers, ClusterNumbersShow, NegbourNumber, NegbourType (that is used only in several maps) was transferred from EMMAlgorithm to Map Parameters. Now EMMBaseMap class inherited from ParameterizedNamedItem (not from Item). And EMMIslandMap and EMMNetworkMap contains their parameters (constructors was modified). CreationMap calls functions were simplified.
  9. Functions for different distance metric calculation was added. Now, it is possible to calculate different types of distances between models (with different random values of constants).
  10. DistanceParametr was added. Now maps can be created according different types of distance calculations.
  11. The class EMMClustering has new name KMeansClusterizationAlgorithm. On KMeansClusterizationAlgorithm bug with bloating of centroids list was fixed. Algorithm was adopted for working with different type of distance metric and get maximum number of iterations.
  12. Possibilities for constants optimization in sub-models an whole tree was added. EMMAlgorithm get new function for evaluation of individuals (and some additional technical stuff for that). Function for trees with model in usual tree transformation and back was added.
  13. EMMAlgorithm was divided on 2 parts:
  • EMMAlgorithm, that contain evolutionary algorithm working with sub-models, and use ready to use maps;
  • ModelSetPreparation, that contain distance calculation, model set simplification and map creation.
File size: 17.6 KB
Line 
1using HEAL.Attic;
2using HeuristicLab.Algorithms.DataAnalysis;
3using HeuristicLab.Common;
4using HeuristicLab.Core;
5using HeuristicLab.Data;
6using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
7using HeuristicLab.Optimization;
8using HeuristicLab.Parameters;
9using HeuristicLab.Problems.DataAnalysis;
10using HeuristicLab.Problems.DataAnalysis.Symbolic;
11using HeuristicLab.Random;
12using System;
13using System.Collections.Generic;
14using System.IO;
15using System.Linq;
16using CancellationToken = System.Threading.CancellationToken;
17using ExecutionContext = HeuristicLab.Core.ExecutionContext;
18
19namespace HeuristicLab.Algorithms.EvolvmentModelsOfModels {
20  [Item("ModelSetPreparation", "Model Set preparation algorithm.")]
21  [Creatable(CreatableAttribute.Categories.Algorithms, Priority = 125)]
22  [StorableType("3C5DF308-DB79-4ACD-894B-F795F081726B")]
23  public class ModelSetPreparation : FixedDataAnalysisAlgorithm<ISymbolicDataAnalysisSingleObjectiveProblem> {
24    #region data members
25    [Storable]
26    protected ExecutionContext executionContext;
27    [Storable]
28    protected ExecutionState previousExecutionState;
29    [Storable]
30    protected IEnumerable<ISymbolicExpressionTree> trees;
31    [Storable]
32    protected ExecutionState currentExecutionState;
33    #endregion
34
35    #region parameters
36    private const string SeedParameterName = "Seed";
37    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
38    private const string RandomParameterName = "Random";
39    private const string InputFileParameterName = "InputFile";
40    private const string AlgorithmImplementationTypeParameterName = "AlgorithmImplementationType";
41    private const string GoalParameterName = "Goal";
42    private const string DistanceTypeParameterName = "DistanceType";
43    private const string MapParameterName = "Map";
44
45    public IFixedValueParameter<IntValue> SeedParameter {
46      get { return (IFixedValueParameter<IntValue>)Parameters[SeedParameterName]; }
47    }
48    public IConstrainedValueParameter<StringValue> AlgorithmImplementationTypeParameter {
49      get { return (IConstrainedValueParameter<StringValue>)Parameters[AlgorithmImplementationTypeParameterName]; }
50    }
51    public IConstrainedValueParameter<StringValue> GoalParameter {
52      get { return (IConstrainedValueParameter<StringValue>)Parameters[GoalParameterName]; }
53    }
54    public IConstrainedValueParameter<StringValue> DistanceTypeParameter {
55      get { return (IConstrainedValueParameter<StringValue>)Parameters[DistanceTypeParameterName]; }
56    }
57    public IConstrainedValueParameter<EMMMapBase<ISymbolicExpressionTree>> MapParameter {
58      get { return (IConstrainedValueParameter<EMMMapBase<ISymbolicExpressionTree>>)Parameters[MapParameterName]; }
59    }
60    public IFixedValueParameter<StringValue> InputFileParameter {
61      get { return (IFixedValueParameter<StringValue>)Parameters[InputFileParameterName]; }
62    }
63    public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter {
64      get { return (IFixedValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; }
65    }
66    public IValueParameter<IRandom> RandomParameter {
67      get { return (IValueParameter<IRandom>)Parameters[RandomParameterName]; }
68    }
69    #endregion
70
71    #region parameter properties
72    public int Seed {
73      get { return SeedParameter.Value.Value; }
74      set { SeedParameter.Value.Value = value; }
75    }
76    public StringValue AlgorithmImplemetationType {
77      get { return AlgorithmImplementationTypeParameter.Value; }
78      set { AlgorithmImplementationTypeParameter.Value.Value = value.Value; }
79    }
80    public StringValue Goal {
81      get { return GoalParameter.Value; }
82      set { GoalParameter.Value.Value = value.Value; }
83    }
84    public StringValue DistanceType {
85      get { return DistanceTypeParameter.Value; }
86      set { DistanceTypeParameter.Value.Value = value.Value; }
87    }
88    public EMMMapBase<ISymbolicExpressionTree> Map {
89      get { return MapParameter.Value; }
90      set { MapParameter.Value = value; }
91    }
92    public StringValue InputFile {
93      get { return InputFileParameter.Value; }
94      set { InputFileParameter.Value.Value = value.Value; }
95    }
96    public bool SetSeedRandomly {
97      get { return SetSeedRandomlyParameter.Value.Value; }
98      set { SetSeedRandomlyParameter.Value.Value = value; }
99    }
100    #endregion
101
102    #region constructors
103    public ModelSetPreparation() {
104
105      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
106      Parameters.Add(new FixedValueParameter<StringValue>(InputFileParameterName, "The file with set of models that will be.", new StringValue("input.txt")));
107      Parameters.Add(new ConstrainedValueParameter<StringValue>(AlgorithmImplementationTypeParameterName, "The Type of possible algorithm implementation, choose one: OnlyMap, Full, Read."));
108      Parameters.Add(new ConstrainedValueParameter<StringValue>(GoalParameterName, "The goal of algorithm implementation, choose one: ToSee, ToWork, Full."));
109      Parameters.Add(new ConstrainedValueParameter<StringValue>(DistanceTypeParameterName, "The Type of possible distance calculator for case of only distance calculation."));
110      Parameters.Add(new ConstrainedValueParameter<EMMMapBase<ISymbolicExpressionTree>>(MapParameterName, "The type of map creation algorithm. Use one from: IslandMap, NetworkMap."));
111      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
112
113      Parameters.Add(new ValueParameter<IRandom>(RandomParameterName, new MersenneTwister()));
114
115      //begin hack ...
116      InputFile.ValueChanged += InputFile_ValueChanged;
117      InfixExpressionParser parser = new InfixExpressionParser();
118      trees = File.ReadAllLines(InputFileParameter.Value.Value).Select(parser.Parse).ToArray();
119      // end hack
120
121      ProblemChanged += ModelSetPreporation_ProblemChanged;
122      MapParameterUpdate();
123
124    }
125
126    // also hack !!!!!!!!!!!!!!!!!!!!!!!!!
127    private void InputFile_ValueChanged(object sender, EventArgs e) {
128      InfixExpressionParser parser = new InfixExpressionParser();
129      trees = File.ReadAllLines(InputFileParameter.Value.Value).Select(parser.Parse);
130    }
131    // remove again !!!!!!!!!!!!!!!!!!!!!!
132
133    private void ModelSetPreporation_ProblemChanged(object sender, EventArgs e) {
134      if (Problem != null) {
135        Problem.SymbolicExpressionTreeInterpreter = new SymbolicDataAnalysisExpressionTreeBatchInterpreter();
136      }
137    }
138    protected void MapParameterUpdate() {
139
140      var mapTypes = new EMMMapBase<ISymbolicExpressionTree>[]
141      {
142        new EMMZeroMap (),
143        new EMMIslandMap(),
144        new EMMNetworkMap(),
145        new EMMDisatanceMap(),
146        new EMMRankMap(),
147        new EMMSucsessMap ()
148      };
149      foreach (var t in mapTypes) {
150        MapParameter.ValidValues.Add(t);
151      }
152      var algorithmType = new StringValue[]
153        {
154          new StringValue ("DistanceCalculation"),
155          new StringValue ("OnlyMap"),
156          new StringValue ("Statistic")
157        };
158      foreach (var t in algorithmType) {
159        AlgorithmImplementationTypeParameter.ValidValues.Add(t);
160      }
161      var goal = new StringValue[]
162  {
163          new StringValue ("ToWork"),
164          new StringValue ("ToSee"),
165          new StringValue ("Full")
166  };
167      foreach (var t in goal) {
168        GoalParameter.ValidValues.Add(t);
169      }
170      var distanceType = new StringValue[]
171        {
172          new StringValue("MSE"),
173          new StringValue("PearsonsRSquared"),
174          new StringValue ("Covariance"),
175          new StringValue ("MaxAbsoluteError"),
176          new StringValue ("MeanAbsoluteError"),
177          new StringValue ("Symbolic")
178        };
179      foreach (var t in distanceType) {
180        DistanceTypeParameter.ValidValues.Add(t);
181      }
182    }
183
184    protected ModelSetPreparation(ModelSetPreparation original, Cloner cloner) : base(original, cloner) {
185
186      previousExecutionState = original.previousExecutionState;
187      if (original.executionContext != null) {
188        executionContext = cloner.Clone(original.executionContext);
189      }
190      // hack
191      trees = original.trees.Select(x => cloner.Clone(x)).ToArray();
192    }
193
194    [StorableConstructor]
195    protected ModelSetPreparation(StorableConstructorFlag _) : base(_) { }
196    public override IDeepCloneable Clone(Cloner cloner) {
197      return new ModelSetPreparation(this, cloner);
198    }
199    #endregion
200
201    #region technical stuff
202    public override void Prepare() {
203      base.Prepare();
204    }
205
206    protected override void Initialize(CancellationToken cancellationToken) {
207      base.Initialize(cancellationToken);
208    }
209
210    public override bool SupportsPause => true;
211
212    // implements random number generation from https://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation
213
214    #region operator wiring and events
215    private void ParameterizeStochasticOperator(IOperator op) {
216      IStochasticOperator stochasticOp = op as IStochasticOperator;
217      if (stochasticOp != null) {
218        stochasticOp.RandomParameter.ActualName = "Random";
219        stochasticOp.RandomParameter.Hidden = true;
220      }
221    }
222    protected void ExecuteOperation(ExecutionContext executionContext, CancellationToken cancellationToken, IOperation operation) {
223      Stack<IOperation> executionStack = new Stack<IOperation>();
224      executionStack.Push(operation);
225      while (executionStack.Count > 0) {
226        cancellationToken.ThrowIfCancellationRequested();
227        IOperation next = executionStack.Pop();
228        if (next is OperationCollection coll) {
229          for (int i = coll.Count - 1; i >= 0; i--)
230            if (coll[i] != null) executionStack.Push(coll[i]);
231        } else if (next is IAtomicOperation op) {
232          next = op.Operator.Execute((IExecutionContext)op, cancellationToken);
233          if (next != null) executionStack.Push(next);
234        }
235      }
236    }
237
238    protected override void OnProblemChanged() {
239      base.OnProblemChanged();
240    }
241
242    protected override void OnExecutionStateChanged() {
243      previousExecutionState = currentExecutionState;
244      currentExecutionState = ExecutionState;
245      base.OnExecutionStateChanged();
246    }
247
248    protected override void OnStopped() {
249      if (executionContext != null) {
250        if (executionContext.Scope != null) {
251          if (executionContext.Scope.SubScopes != null) {
252            executionContext.Scope.SubScopes.Clear();
253          }
254        }
255      }
256      base.OnStopped();
257    }
258    #endregion
259    #endregion
260
261    #region algorithm implementation
262    protected override void Run(CancellationToken cancellationToken) {
263      Map.DistanceParametr = DistanceType.Value;
264      //distance calculation or reading that should be done in any cases
265      string fileNameForWatch = "DistanceMatrix_Watch" + DistanceType + ".txt";
266      string fileName = "DistanceMatrix_" + DistanceType + ".txt";
267      double[,] totalDistance;
268      if (AlgorithmImplemetationType.Value == "DistanceCalculation") {
269        totalDistance = TotalDistanceMatrixCalculation(RandomParameter.Value, Problem, trees.ToList(), DistanceType.Value);
270        if (Goal.Value != "ToWork") {
271          FileComuncations.DoubleMatrixPrint(fileNameForWatch, totalDistance, trees.Count());
272        }
273        if (Goal.Value != "ToSee") {
274          FileComuncations.DoubleMatrixSerialize(fileName, totalDistance);
275        }
276      } else {
277        totalDistance = FileComuncations.DoubleMatrixDeserialize(fileName);
278        // totalDistance = FileComuncations.DoubleMatrixFromFileRead(fileName, trees.Count());
279      }
280
281      if (AlgorithmImplemetationType.Value == "Statistic") {
282        var statistic = new int[trees.Count(), trees.Count()];
283        for (int i = 0; i < trees.Count(); i++) {
284          for (int j = 0; j < trees.Count(); j++)
285            statistic[i, j] = 0;
286        }
287        var maps = new List<List<List<int>>>();
288        int repetitionNumber = 10;
289        Map.MapCreationPrepare(trees);
290        for (int i = 0; i < repetitionNumber; i++) {
291          Map.CreateMap(RandomParameter.Value, totalDistance);
292          maps.Add(Map.Map);
293          CheckClusters(statistic);
294          Map.Map.Clear();
295        }
296      } else { // Simple map creation case
297        Map.MapCreationPrepare(trees);
298        Map.CreateMap(RandomParameter.Value, totalDistance);
299        Map.WriteMapToTxtFile(RandomParameter.Value);// This should be deactivated in case of using HIVE. HIVE can not work with it.
300      }
301    }
302    protected void CheckClusters(int[,] info) {
303      // ToDo: It should be realized for statistics collection
304    }
305    #region distance manipulation
306
307    public static double[,] DistanceMatrixCalculation(List<ISymbolicExpressionTree> trees, string distanceType, ISymbolicDataAnalysisSingleObjectiveProblem Problem) {
308      var problemData = (IRegressionProblemData)Problem.ProblemData;
309      var dataset = problemData.Dataset;
310      var rows = problemData.TrainingIndices;
311      var interpreter = Problem.SymbolicExpressionTreeInterpreter;
312      string[] toWrite = new string[trees.Count()];
313      int i = 0;
314      var treeValues = new List<List<double>>();
315      if (distanceType != "Symbolic") {
316        foreach (var tree in trees) {
317          treeValues.Add(interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows).ToList());
318        }
319      }
320      double[,] distances = new double[trees.Count, trees.Count];
321      OnlineCalculatorError err;
322      switch (distanceType) {
323        case "MSE":
324          for (i = 0; i < trees.Count - 1; i++) {
325            for (int j = i + 1; j < trees.Count; j++) {
326              distances[j, i] = distances[i, j] = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
327            }
328          }
329          break;
330        case "PearsonsRSquared":
331          for (i = 0; i < trees.Count - 1; i++) {
332            for (int j = i + 1; j < trees.Count; j++) {
333              distances[j, i] = distances[i, j] = OnlinePearsonsRCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
334            }
335          }
336          break;
337        case "Covariance":
338          for (i = 0; i < trees.Count - 1; i++) {
339            for (int j = i + 1; j < trees.Count; j++) {
340              distances[j, i] = distances[i, j] = OnlineCovarianceCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
341            }
342          }
343          break;
344        case "MaxAbsoluteError":
345          for (i = 0; i < trees.Count - 1; i++) {
346            for (int j = i + 1; j < trees.Count; j++) {
347              distances[j, i] = distances[i, j] = OnlineMaxAbsoluteErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
348            }
349          }
350          break;
351        case "MeanAbsoluteError":
352          for (i = 0; i < trees.Count - 1; i++) {
353            for (int j = i + 1; j < trees.Count; j++) {
354              distances[j, i] = distances[i, j] = OnlineMeanAbsoluteErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
355            }
356          }
357          break;
358        case "Symbolic":
359          distances = SymbolicExpressionTreeHash.ComputeSimilarityMatrix(trees, simplify: false, strict: true);
360          for (i = 0; i < trees.Count - 1; i++) {
361            for (int j = i + 1; j < trees.Count; j++) {
362              distances[j, i] = distances[i, j] = 1 - distances[i, j];
363            }
364          }
365          break;
366      }
367
368      return distances;
369    }
370    public static double[,] CalculateDistances(List<ISymbolicExpressionTree> treesSet) {
371      double[,] distances;
372      distances = SymbolicExpressionTreeHash.ComputeSimilarityMatrix(treesSet, simplify: false, strict: true);
373      for (int i = 0; i < treesSet.Count - 1; i++) {
374        for (int j = i + 1; j < treesSet.Count; j++) {
375          distances[j, i] = distances[i, j] = 1 - distances[i, j];
376        }
377      }
378      return distances;
379    }
380    public static double[,] TotalDistanceMatrixCalculation(IRandom random, ISymbolicDataAnalysisSingleObjectiveProblem problem, List<ISymbolicExpressionTree> treesSet, string distanceType) {
381      var setSize = treesSet.Count();
382      var totalDistance = new double[setSize, setSize];
383      var treeSetTemp = new List<ISymbolicExpressionTree>();
384      foreach (var tree in treesSet) {
385        treeSetTemp.Add((ISymbolicExpressionTree)tree.Clone());
386      }
387      if (distanceType != "Symbolic") {
388
389        int repitNumber = 10;
390        totalDistance = new double[setSize, setSize];
391        for (int i = 0; i < setSize; i++) {
392          for (int j = 0; j < setSize; j++) {
393            totalDistance[i, j] = 0;
394          }
395        }
396        for (int i = 0; i < repitNumber; i++) {
397          foreach (var tree in treeSetTemp) {
398            HelpFunctions.SetLocalParametersForTree(random, 0.5, tree);
399          }
400          var distanceMatrix = DistanceMatrixCalculation(treeSetTemp, distanceType, problem);
401          for (int t = 0; t < setSize; t++) {
402            for (int j = 0; j < setSize; j++) {
403              totalDistance[t, j] += Math.Abs(distanceMatrix[t, j]) / repitNumber;
404            }
405          }
406        }
407      } else {
408        foreach (var tree in treeSetTemp) {
409          HelpFunctions.SetLocalParametersForTree(random, 0.5, tree);
410        }
411        totalDistance = CalculateDistances(treeSetTemp);
412      }
413      return totalDistance;
414    }
415    #endregion
416    #endregion
417  }
418}
Note: See TracBrowser for help on using the repository browser.