1 | using HEAL.Attic;
|
---|
2 | using HeuristicLab.Algorithms.DataAnalysis;
|
---|
3 | using HeuristicLab.Common;
|
---|
4 | using HeuristicLab.Core;
|
---|
5 | using HeuristicLab.Data;
|
---|
6 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
7 | using HeuristicLab.Optimization;
|
---|
8 | using HeuristicLab.Parameters;
|
---|
9 | using HeuristicLab.Problems.DataAnalysis;
|
---|
10 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
11 | using HeuristicLab.Random;
|
---|
12 | using System;
|
---|
13 | using System.Collections.Generic;
|
---|
14 | using System.IO;
|
---|
15 | using System.Linq;
|
---|
16 | using CancellationToken = System.Threading.CancellationToken;
|
---|
17 | using ExecutionContext = HeuristicLab.Core.ExecutionContext;
|
---|
18 |
|
---|
19 | namespace HeuristicLab.Algorithms.EvolvmentModelsOfModels {
|
---|
20 | [Item("ModelSetPreparation", "Model Set preparation algorithm.")]
|
---|
21 | [Creatable(CreatableAttribute.Categories.Algorithms, Priority = 125)]
|
---|
22 | [StorableType("3C5DF308-DB79-4ACD-894B-F795F081726B")]
|
---|
23 | public class ModelSetPreparation : FixedDataAnalysisAlgorithm<ISymbolicDataAnalysisSingleObjectiveProblem> {
|
---|
24 | #region data members
|
---|
25 | [Storable]
|
---|
26 | protected ExecutionContext executionContext;
|
---|
27 | [Storable]
|
---|
28 | protected ExecutionState previousExecutionState;
|
---|
29 | [Storable]
|
---|
30 | protected IEnumerable<ISymbolicExpressionTree> trees;
|
---|
31 | [Storable]
|
---|
32 | protected ExecutionState currentExecutionState;
|
---|
33 | #endregion
|
---|
34 |
|
---|
35 | #region parameters
|
---|
36 | private const string SeedParameterName = "Seed";
|
---|
37 | private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
|
---|
38 | private const string RandomParameterName = "Random";
|
---|
39 | private const string InputFileParameterName = "InputFile";
|
---|
40 | private const string AlgorithmImplementationTypeParameterName = "AlgorithmImplementationType";
|
---|
41 | private const string GoalParameterName = "Goal";
|
---|
42 | private const string DistanceTypeParameterName = "DistanceType";
|
---|
43 | private const string MapParameterName = "Map";
|
---|
44 |
|
---|
45 | public IFixedValueParameter<IntValue> SeedParameter {
|
---|
46 | get { return (IFixedValueParameter<IntValue>)Parameters[SeedParameterName]; }
|
---|
47 | }
|
---|
48 | public IConstrainedValueParameter<StringValue> AlgorithmImplementationTypeParameter {
|
---|
49 | get { return (IConstrainedValueParameter<StringValue>)Parameters[AlgorithmImplementationTypeParameterName]; }
|
---|
50 | }
|
---|
51 | public IConstrainedValueParameter<StringValue> GoalParameter {
|
---|
52 | get { return (IConstrainedValueParameter<StringValue>)Parameters[GoalParameterName]; }
|
---|
53 | }
|
---|
54 | public IConstrainedValueParameter<StringValue> DistanceTypeParameter {
|
---|
55 | get { return (IConstrainedValueParameter<StringValue>)Parameters[DistanceTypeParameterName]; }
|
---|
56 | }
|
---|
57 | public IConstrainedValueParameter<EMMMapBase<ISymbolicExpressionTree>> MapParameter {
|
---|
58 | get { return (IConstrainedValueParameter<EMMMapBase<ISymbolicExpressionTree>>)Parameters[MapParameterName]; }
|
---|
59 | }
|
---|
60 | public IFixedValueParameter<StringValue> InputFileParameter {
|
---|
61 | get { return (IFixedValueParameter<StringValue>)Parameters[InputFileParameterName]; }
|
---|
62 | }
|
---|
63 | public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter {
|
---|
64 | get { return (IFixedValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; }
|
---|
65 | }
|
---|
66 | public IValueParameter<IRandom> RandomParameter {
|
---|
67 | get { return (IValueParameter<IRandom>)Parameters[RandomParameterName]; }
|
---|
68 | }
|
---|
69 | #endregion
|
---|
70 |
|
---|
71 | #region parameter properties
|
---|
72 | public int Seed {
|
---|
73 | get { return SeedParameter.Value.Value; }
|
---|
74 | set { SeedParameter.Value.Value = value; }
|
---|
75 | }
|
---|
76 | public StringValue AlgorithmImplemetationType {
|
---|
77 | get { return AlgorithmImplementationTypeParameter.Value; }
|
---|
78 | set { AlgorithmImplementationTypeParameter.Value.Value = value.Value; }
|
---|
79 | }
|
---|
80 | public StringValue Goal {
|
---|
81 | get { return GoalParameter.Value; }
|
---|
82 | set { GoalParameter.Value.Value = value.Value; }
|
---|
83 | }
|
---|
84 | public StringValue DistanceType {
|
---|
85 | get { return DistanceTypeParameter.Value; }
|
---|
86 | set { DistanceTypeParameter.Value.Value = value.Value; }
|
---|
87 | }
|
---|
88 | public EMMMapBase<ISymbolicExpressionTree> Map {
|
---|
89 | get { return MapParameter.Value; }
|
---|
90 | set { MapParameter.Value = value; }
|
---|
91 | }
|
---|
92 | public StringValue InputFile {
|
---|
93 | get { return InputFileParameter.Value; }
|
---|
94 | set { InputFileParameter.Value.Value = value.Value; }
|
---|
95 | }
|
---|
96 | public bool SetSeedRandomly {
|
---|
97 | get { return SetSeedRandomlyParameter.Value.Value; }
|
---|
98 | set { SetSeedRandomlyParameter.Value.Value = value; }
|
---|
99 | }
|
---|
100 | #endregion
|
---|
101 |
|
---|
102 | #region constructors
|
---|
103 | public ModelSetPreparation() {
|
---|
104 |
|
---|
105 | Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
|
---|
106 | Parameters.Add(new FixedValueParameter<StringValue>(InputFileParameterName, "The file with set of models that will be.", new StringValue("input.txt")));
|
---|
107 | Parameters.Add(new ConstrainedValueParameter<StringValue>(AlgorithmImplementationTypeParameterName, "The Type of possible algorithm implementation, choose one: OnlyMap, Full, Read."));
|
---|
108 | Parameters.Add(new ConstrainedValueParameter<StringValue>(GoalParameterName, "The goal of algorithm implementation, choose one: ToSee, ToWork, Full."));
|
---|
109 | Parameters.Add(new ConstrainedValueParameter<StringValue>(DistanceTypeParameterName, "The Type of possible distance calculator for case of only distance calculation."));
|
---|
110 | Parameters.Add(new ConstrainedValueParameter<EMMMapBase<ISymbolicExpressionTree>>(MapParameterName, "The type of map creation algorithm. Use one from: IslandMap, NetworkMap."));
|
---|
111 | Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
|
---|
112 |
|
---|
113 | Parameters.Add(new ValueParameter<IRandom>(RandomParameterName, new MersenneTwister()));
|
---|
114 |
|
---|
115 | //begin hack ...
|
---|
116 | InputFile.ValueChanged += InputFile_ValueChanged;
|
---|
117 | InfixExpressionParser parser = new InfixExpressionParser();
|
---|
118 | trees = File.ReadAllLines(InputFileParameter.Value.Value).Select(parser.Parse).ToArray();
|
---|
119 | // end hack
|
---|
120 |
|
---|
121 | ProblemChanged += ModelSetPreporation_ProblemChanged;
|
---|
122 | MapParameterUpdate();
|
---|
123 |
|
---|
124 | }
|
---|
125 |
|
---|
126 | // also hack !!!!!!!!!!!!!!!!!!!!!!!!!
|
---|
127 | private void InputFile_ValueChanged(object sender, EventArgs e) {
|
---|
128 | InfixExpressionParser parser = new InfixExpressionParser();
|
---|
129 | trees = File.ReadAllLines(InputFileParameter.Value.Value).Select(parser.Parse);
|
---|
130 | }
|
---|
131 | // remove again !!!!!!!!!!!!!!!!!!!!!!
|
---|
132 |
|
---|
133 | private void ModelSetPreporation_ProblemChanged(object sender, EventArgs e) {
|
---|
134 | if (Problem != null) {
|
---|
135 | Problem.SymbolicExpressionTreeInterpreter = new SymbolicDataAnalysisExpressionTreeBatchInterpreter();
|
---|
136 | }
|
---|
137 | }
|
---|
138 | protected void MapParameterUpdate() {
|
---|
139 |
|
---|
140 | var mapTypes = new EMMMapBase<ISymbolicExpressionTree>[]
|
---|
141 | {
|
---|
142 | new EMMZeroMap (),
|
---|
143 | new EMMIslandMap(),
|
---|
144 | new EMMNetworkMap(),
|
---|
145 | new EMMDisatanceMap(),
|
---|
146 | new EMMRankMap(),
|
---|
147 | new EMMSucsessMap ()
|
---|
148 | };
|
---|
149 | foreach (var t in mapTypes) {
|
---|
150 | MapParameter.ValidValues.Add(t);
|
---|
151 | }
|
---|
152 | var algorithmType = new StringValue[]
|
---|
153 | {
|
---|
154 | new StringValue ("DistanceCalculation"),
|
---|
155 | new StringValue ("OnlyMap"),
|
---|
156 | new StringValue ("Statistic")
|
---|
157 | };
|
---|
158 | foreach (var t in algorithmType) {
|
---|
159 | AlgorithmImplementationTypeParameter.ValidValues.Add(t);
|
---|
160 | }
|
---|
161 | var goal = new StringValue[]
|
---|
162 | {
|
---|
163 | new StringValue ("ToWork"),
|
---|
164 | new StringValue ("ToSee"),
|
---|
165 | new StringValue ("Full")
|
---|
166 | };
|
---|
167 | foreach (var t in goal) {
|
---|
168 | GoalParameter.ValidValues.Add(t);
|
---|
169 | }
|
---|
170 | var distanceType = new StringValue[]
|
---|
171 | {
|
---|
172 | new StringValue("MSE"),
|
---|
173 | new StringValue("PearsonsRSquared"),
|
---|
174 | new StringValue ("Covariance"),
|
---|
175 | new StringValue ("MaxAbsoluteError"),
|
---|
176 | new StringValue ("MeanAbsoluteError"),
|
---|
177 | new StringValue ("Symbolic")
|
---|
178 | };
|
---|
179 | foreach (var t in distanceType) {
|
---|
180 | DistanceTypeParameter.ValidValues.Add(t);
|
---|
181 | }
|
---|
182 | }
|
---|
183 |
|
---|
184 | protected ModelSetPreparation(ModelSetPreparation original, Cloner cloner) : base(original, cloner) {
|
---|
185 |
|
---|
186 | previousExecutionState = original.previousExecutionState;
|
---|
187 | if (original.executionContext != null) {
|
---|
188 | executionContext = cloner.Clone(original.executionContext);
|
---|
189 | }
|
---|
190 | // hack
|
---|
191 | trees = original.trees.Select(x => cloner.Clone(x)).ToArray();
|
---|
192 | }
|
---|
193 |
|
---|
194 | [StorableConstructor]
|
---|
195 | protected ModelSetPreparation(StorableConstructorFlag _) : base(_) { }
|
---|
196 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
197 | return new ModelSetPreparation(this, cloner);
|
---|
198 | }
|
---|
199 | #endregion
|
---|
200 |
|
---|
201 | #region technical stuff
|
---|
202 | public override void Prepare() {
|
---|
203 | base.Prepare();
|
---|
204 | }
|
---|
205 |
|
---|
206 | protected override void Initialize(CancellationToken cancellationToken) {
|
---|
207 | base.Initialize(cancellationToken);
|
---|
208 | }
|
---|
209 |
|
---|
210 | public override bool SupportsPause => true;
|
---|
211 |
|
---|
212 | // implements random number generation from https://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation
|
---|
213 |
|
---|
214 | #region operator wiring and events
|
---|
215 | private void ParameterizeStochasticOperator(IOperator op) {
|
---|
216 | IStochasticOperator stochasticOp = op as IStochasticOperator;
|
---|
217 | if (stochasticOp != null) {
|
---|
218 | stochasticOp.RandomParameter.ActualName = "Random";
|
---|
219 | stochasticOp.RandomParameter.Hidden = true;
|
---|
220 | }
|
---|
221 | }
|
---|
222 | protected void ExecuteOperation(ExecutionContext executionContext, CancellationToken cancellationToken, IOperation operation) {
|
---|
223 | Stack<IOperation> executionStack = new Stack<IOperation>();
|
---|
224 | executionStack.Push(operation);
|
---|
225 | while (executionStack.Count > 0) {
|
---|
226 | cancellationToken.ThrowIfCancellationRequested();
|
---|
227 | IOperation next = executionStack.Pop();
|
---|
228 | if (next is OperationCollection coll) {
|
---|
229 | for (int i = coll.Count - 1; i >= 0; i--)
|
---|
230 | if (coll[i] != null) executionStack.Push(coll[i]);
|
---|
231 | } else if (next is IAtomicOperation op) {
|
---|
232 | next = op.Operator.Execute((IExecutionContext)op, cancellationToken);
|
---|
233 | if (next != null) executionStack.Push(next);
|
---|
234 | }
|
---|
235 | }
|
---|
236 | }
|
---|
237 |
|
---|
238 | protected override void OnProblemChanged() {
|
---|
239 | base.OnProblemChanged();
|
---|
240 | }
|
---|
241 |
|
---|
242 | protected override void OnExecutionStateChanged() {
|
---|
243 | previousExecutionState = currentExecutionState;
|
---|
244 | currentExecutionState = ExecutionState;
|
---|
245 | base.OnExecutionStateChanged();
|
---|
246 | }
|
---|
247 |
|
---|
248 | protected override void OnStopped() {
|
---|
249 | if (executionContext != null) {
|
---|
250 | if (executionContext.Scope != null) {
|
---|
251 | if (executionContext.Scope.SubScopes != null) {
|
---|
252 | executionContext.Scope.SubScopes.Clear();
|
---|
253 | }
|
---|
254 | }
|
---|
255 | }
|
---|
256 | base.OnStopped();
|
---|
257 | }
|
---|
258 | #endregion
|
---|
259 | #endregion
|
---|
260 |
|
---|
261 | #region algorithm implementation
|
---|
262 | protected override void Run(CancellationToken cancellationToken) {
|
---|
263 | Map.DistanceParametr = DistanceType.Value;
|
---|
264 | //distance calculation or reading that should be done in any cases
|
---|
265 | string fileNameForWatch = "DistanceMatrix_Watch" + DistanceType + ".txt";
|
---|
266 | string fileName = "DistanceMatrix_" + DistanceType + ".txt";
|
---|
267 | double[,] totalDistance;
|
---|
268 | if (AlgorithmImplemetationType.Value == "DistanceCalculation") {
|
---|
269 | totalDistance = TotalDistanceMatrixCalculation(RandomParameter.Value, Problem, trees.ToList(), DistanceType.Value);
|
---|
270 | if (Goal.Value != "ToWork") {
|
---|
271 | FileComuncations.DoubleMatrixPrint(fileNameForWatch, totalDistance, trees.Count());
|
---|
272 | }
|
---|
273 | if (Goal.Value != "ToSee") {
|
---|
274 | FileComuncations.DoubleMatrixSerialize(fileName, totalDistance);
|
---|
275 | }
|
---|
276 | } else {
|
---|
277 | totalDistance = FileComuncations.DoubleMatrixDeserialize(fileName);
|
---|
278 | // totalDistance = FileComuncations.DoubleMatrixFromFileRead(fileName, trees.Count());
|
---|
279 | }
|
---|
280 |
|
---|
281 | if (AlgorithmImplemetationType.Value == "Statistic") {
|
---|
282 | var statistic = new int[trees.Count(), trees.Count()];
|
---|
283 | for (int i = 0; i < trees.Count(); i++) {
|
---|
284 | for (int j = 0; j < trees.Count(); j++)
|
---|
285 | statistic[i, j] = 0;
|
---|
286 | }
|
---|
287 | var maps = new List<List<List<int>>>();
|
---|
288 | int repetitionNumber = 10;
|
---|
289 | Map.MapCreationPrepare(trees);
|
---|
290 | for (int i = 0; i < repetitionNumber; i++) {
|
---|
291 | Map.CreateMap(RandomParameter.Value, totalDistance);
|
---|
292 | maps.Add(Map.Map);
|
---|
293 | CheckClusters(statistic);
|
---|
294 | Map.Map.Clear();
|
---|
295 | }
|
---|
296 | } else { // Simple map creation case
|
---|
297 | Map.MapCreationPrepare(trees);
|
---|
298 | Map.CreateMap(RandomParameter.Value, totalDistance);
|
---|
299 | Map.WriteMapToTxtFile(RandomParameter.Value);// This should be deactivated in case of using HIVE. HIVE can not work with it.
|
---|
300 | }
|
---|
301 | }
|
---|
302 | protected void CheckClusters(int[,] info) {
|
---|
303 | // ToDo: It should be realized for statistics collection
|
---|
304 | }
|
---|
305 | #region distance manipulation
|
---|
306 |
|
---|
307 | public static double[,] DistanceMatrixCalculation(List<ISymbolicExpressionTree> trees, string distanceType, ISymbolicDataAnalysisSingleObjectiveProblem Problem) {
|
---|
308 | var problemData = (IRegressionProblemData)Problem.ProblemData;
|
---|
309 | var dataset = problemData.Dataset;
|
---|
310 | var rows = problemData.TrainingIndices;
|
---|
311 | var interpreter = Problem.SymbolicExpressionTreeInterpreter;
|
---|
312 | string[] toWrite = new string[trees.Count()];
|
---|
313 | int i = 0;
|
---|
314 | var treeValues = new List<List<double>>();
|
---|
315 | if (distanceType != "Symbolic") {
|
---|
316 | foreach (var tree in trees) {
|
---|
317 | treeValues.Add(interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows).ToList());
|
---|
318 | }
|
---|
319 | }
|
---|
320 | double[,] distances = new double[trees.Count, trees.Count];
|
---|
321 | OnlineCalculatorError err;
|
---|
322 | switch (distanceType) {
|
---|
323 | case "MSE":
|
---|
324 | for (i = 0; i < trees.Count - 1; i++) {
|
---|
325 | for (int j = i + 1; j < trees.Count; j++) {
|
---|
326 | distances[j, i] = distances[i, j] = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
|
---|
327 | }
|
---|
328 | }
|
---|
329 | break;
|
---|
330 | case "PearsonsRSquared":
|
---|
331 | for (i = 0; i < trees.Count - 1; i++) {
|
---|
332 | for (int j = i + 1; j < trees.Count; j++) {
|
---|
333 | distances[j, i] = distances[i, j] = OnlinePearsonsRCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
|
---|
334 | }
|
---|
335 | }
|
---|
336 | break;
|
---|
337 | case "Covariance":
|
---|
338 | for (i = 0; i < trees.Count - 1; i++) {
|
---|
339 | for (int j = i + 1; j < trees.Count; j++) {
|
---|
340 | distances[j, i] = distances[i, j] = OnlineCovarianceCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
|
---|
341 | }
|
---|
342 | }
|
---|
343 | break;
|
---|
344 | case "MaxAbsoluteError":
|
---|
345 | for (i = 0; i < trees.Count - 1; i++) {
|
---|
346 | for (int j = i + 1; j < trees.Count; j++) {
|
---|
347 | distances[j, i] = distances[i, j] = OnlineMaxAbsoluteErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
|
---|
348 | }
|
---|
349 | }
|
---|
350 | break;
|
---|
351 | case "MeanAbsoluteError":
|
---|
352 | for (i = 0; i < trees.Count - 1; i++) {
|
---|
353 | for (int j = i + 1; j < trees.Count; j++) {
|
---|
354 | distances[j, i] = distances[i, j] = OnlineMeanAbsoluteErrorCalculator.Calculate(treeValues[i], treeValues[j], out err); ;
|
---|
355 | }
|
---|
356 | }
|
---|
357 | break;
|
---|
358 | case "Symbolic":
|
---|
359 | distances = SymbolicExpressionTreeHash.ComputeSimilarityMatrix(trees, simplify: false, strict: true);
|
---|
360 | for (i = 0; i < trees.Count - 1; i++) {
|
---|
361 | for (int j = i + 1; j < trees.Count; j++) {
|
---|
362 | distances[j, i] = distances[i, j] = 1 - distances[i, j];
|
---|
363 | }
|
---|
364 | }
|
---|
365 | break;
|
---|
366 | }
|
---|
367 |
|
---|
368 | return distances;
|
---|
369 | }
|
---|
370 | public static double[,] CalculateDistances(List<ISymbolicExpressionTree> treesSet) {
|
---|
371 | double[,] distances;
|
---|
372 | distances = SymbolicExpressionTreeHash.ComputeSimilarityMatrix(treesSet, simplify: false, strict: true);
|
---|
373 | for (int i = 0; i < treesSet.Count - 1; i++) {
|
---|
374 | for (int j = i + 1; j < treesSet.Count; j++) {
|
---|
375 | distances[j, i] = distances[i, j] = 1 - distances[i, j];
|
---|
376 | }
|
---|
377 | }
|
---|
378 | return distances;
|
---|
379 | }
|
---|
380 | public static double[,] TotalDistanceMatrixCalculation(IRandom random, ISymbolicDataAnalysisSingleObjectiveProblem problem, List<ISymbolicExpressionTree> treesSet, string distanceType) {
|
---|
381 | var setSize = treesSet.Count();
|
---|
382 | var totalDistance = new double[setSize, setSize];
|
---|
383 | var treeSetTemp = new List<ISymbolicExpressionTree>();
|
---|
384 | foreach (var tree in treesSet) {
|
---|
385 | treeSetTemp.Add((ISymbolicExpressionTree)tree.Clone());
|
---|
386 | }
|
---|
387 | if (distanceType != "Symbolic") {
|
---|
388 |
|
---|
389 | int repitNumber = 10;
|
---|
390 | totalDistance = new double[setSize, setSize];
|
---|
391 | for (int i = 0; i < setSize; i++) {
|
---|
392 | for (int j = 0; j < setSize; j++) {
|
---|
393 | totalDistance[i, j] = 0;
|
---|
394 | }
|
---|
395 | }
|
---|
396 | for (int i = 0; i < repitNumber; i++) {
|
---|
397 | foreach (var tree in treeSetTemp) {
|
---|
398 | HelpFunctions.SetLocalParametersForTree(random, 0.5, tree);
|
---|
399 | }
|
---|
400 | var distanceMatrix = DistanceMatrixCalculation(treeSetTemp, distanceType, problem);
|
---|
401 | for (int t = 0; t < setSize; t++) {
|
---|
402 | for (int j = 0; j < setSize; j++) {
|
---|
403 | totalDistance[t, j] += Math.Abs(distanceMatrix[t, j]) / repitNumber;
|
---|
404 | }
|
---|
405 | }
|
---|
406 | }
|
---|
407 | } else {
|
---|
408 | foreach (var tree in treeSetTemp) {
|
---|
409 | HelpFunctions.SetLocalParametersForTree(random, 0.5, tree);
|
---|
410 | }
|
---|
411 | totalDistance = CalculateDistances(treeSetTemp);
|
---|
412 | }
|
---|
413 | return totalDistance;
|
---|
414 | }
|
---|
415 | #endregion
|
---|
416 | #endregion
|
---|
417 | }
|
---|
418 | }
|
---|