Free cookie consent management tool by TermsFeed Policy Generator

source: misc/tools/HeuristicLab.HiveDrain/HeuristicLab.HiveDrain/JobTaskOneFileDownloader.cs @ 15681

Last change on this file since 15681 was 15494, checked in by bburlacu, 6 years ago

#2829: Add option to select the save path for the downloaded jobs. Automatically remove duplicate datasets when the job is saved in a single file. Add button to manually list hive jobs.

File size: 5.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Clients.Hive;
27using HeuristicLab.Clients.Hive.Jobs;
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Optimization;
31using HeuristicLab.Problems.DataAnalysis;
32
33namespace HeuristicLab.HiveDrain {
34  public class JobTaskOneFileDownloader {
35    public String RootLocation { get; set; }
36
37    public Job ParentJob { get; set; }
38
39    private ILog log;
40
41    private RunCollection results = new RunCollection();
42
43    private static ConcurrentTaskDownloader<ItemTask> downloader =
44        new ConcurrentTaskDownloader<ItemTask>(HeuristicLabHiveDrainApplication.MaxParallelDownloads, HeuristicLabHiveDrainApplication.MaxParallelDownloads);
45
46    private static int jobCount = 0;
47
48    private static bool endReached = false;
49
50    private ManualResetEvent allJobsFinished = new ManualResetEvent(false);
51
52    private Semaphore limitSemaphore = null;
53
54    static JobTaskOneFileDownloader() {
55      downloader.ExceptionOccured += downloader_ExceptionOccured;
56    }
57
58    static void downloader_ExceptionOccured(object sender, HeuristicLab.Common.EventArgs<Exception> e) {
59      HiveDrainMainWindow.Log.LogMessage(DateTime.Now.ToShortTimeString() + " ### Exception occured: " + e.Value);
60    }
61
62    public JobTaskOneFileDownloader(string path, Job parentJob, Semaphore sem, ILog log) {
63      RootLocation = path + ".hl";
64      ParentJob = parentJob;
65      limitSemaphore = sem;
66      this.log = log;
67    }
68
69    public void Start() {
70      results = new RunCollection();
71
72      var allTasks = HiveServiceLocator.Instance.CallHiveService(s => s.GetLightweightJobTasksWithoutStateLog(ParentJob.Id));
73      var totalJobCount = allTasks.Count;
74      var optimizers = new List<IOptimizer>();
75      var finishedCount = -1;
76      using (var downloader = new TaskDownloader(allTasks.Select(x => x.Id))) {
77        downloader.StartAsync();
78
79        while (!downloader.IsFinished || finishedCount < totalJobCount) {
80          if (finishedCount != downloader.FinishedCount) {
81            finishedCount = downloader.FinishedCount;
82            log.LogMessage(string.Format("Downloading/deserializing tasks... ({0}/{1} finished)", finishedCount, totalJobCount));
83          }
84
85          Thread.Sleep(500);
86
87          if (downloader.IsFaulted) {
88            throw downloader.Exception;
89          }
90        }
91
92        IDictionary<Guid, HiveTask> allHiveTasks = downloader.Results;
93        log.LogMessage("Building hive job tree...");
94        var parentTasks = allHiveTasks.Values.Where(x => !x.Task.ParentTaskId.HasValue);
95
96        foreach (var parentTask in parentTasks) {
97          BuildHiveJobTree(parentTask, allTasks, allHiveTasks);
98
99          var optimizerTask = parentTask.ItemTask as OptimizerTask;
100
101          if (optimizerTask != null) {
102            optimizers.Add(optimizerTask.Item);
103          }
104        }
105      }
106      if (!optimizers.Any()) return;
107      IStorableContent storable;
108      if (optimizers.Count > 1) {
109        var experiment = new Experiment();
110        experiment.Optimizers.AddRange(optimizers);
111        storable = experiment;
112      } else {
113        var optimizer = optimizers.First();
114        storable = optimizer as IStorableContent;
115      }
116      if (storable != null) {
117        // remove duplicate datasets
118        log.LogMessage("Removing duplicate datasets...");
119        DatasetUtil.RemoveDuplicateDatasets(storable);
120
121        log.LogMessage(string.Format("Save job as {0}", RootLocation));
122        ContentManager.Save(storable, RootLocation, true);
123      } else {
124        log.LogMessage(string.Format("Could not save job, content is not storable."));
125      }
126    }
127
128    private static void BuildHiveJobTree(HiveTask parentHiveTask, IEnumerable<LightweightTask> allTasks, IDictionary<Guid, HiveTask> allHiveTasks) {
129      IEnumerable<LightweightTask> childTasks = from job in allTasks
130                                                where job.ParentTaskId.HasValue && job.ParentTaskId.Value == parentHiveTask.Task.Id
131                                                orderby job.DateCreated ascending
132                                                select job;
133      foreach (LightweightTask task in childTasks) {
134        HiveTask childHiveTask = allHiveTasks[task.Id];
135        BuildHiveJobTree(childHiveTask, allTasks, allHiveTasks);
136        parentHiveTask.AddChildHiveTask(childHiveTask);
137      }
138    }
139  }
140}
Note: See TracBrowser for help on using the repository browser.