source: branches/UnloadJobs/HeuristicLab.Clients.Hive/3.3/HiveClient.cs @ 9202

Last change on this file since 9202 was 9202, checked in by ascheibe, 7 years ago

#2005

  • add repeating of failed tasks and task datas
  • added an additional ws method that loads lightweight tasks without the statelogs
File size: 22.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Configuration;
25using System.IO;
26using System.Linq;
27using System.Security.Cryptography;
28using System.Threading;
29using System.Threading.Tasks;
30using HeuristicLab.Common;
31using HeuristicLab.Core;
32using HeuristicLab.MainForm;
33using HeuristicLab.PluginInfrastructure;
34using TS = System.Threading.Tasks;
35
36namespace HeuristicLab.Clients.Hive {
37  [Item("HiveClient", "Hive client.")]
38  public sealed class HiveClient : IContent {
39    private static HiveClient instance;
40    public static HiveClient Instance {
41      get {
42        if (instance == null) instance = new HiveClient();
43        return instance;
44      }
45    }
46
47    #region Properties
48    private HiveItemCollection<RefreshableJob> jobs;
49    public HiveItemCollection<RefreshableJob> Jobs {
50      get { return jobs; }
51      set {
52        if (value != jobs) {
53          jobs = value;
54          OnHiveJobsChanged();
55        }
56      }
57    }
58
59    private List<Plugin> onlinePlugins;
60    public List<Plugin> OnlinePlugins {
61      get { return onlinePlugins; }
62      set { onlinePlugins = value; }
63    }
64
65    private List<Plugin> alreadyUploadedPlugins;
66    public List<Plugin> AlreadyUploadedPlugins {
67      get { return alreadyUploadedPlugins; }
68      set { alreadyUploadedPlugins = value; }
69    }
70
71    private bool isAllowedPrivileged;
72    public bool IsAllowedPrivileged {
73      get { return isAllowedPrivileged; }
74      set { isAllowedPrivileged = value; }
75    }
76    #endregion
77
78    private HiveClient() {
79      //this will never be deregistered
80      TaskScheduler.UnobservedTaskException += new EventHandler<UnobservedTaskExceptionEventArgs>(TaskScheduler_UnobservedTaskException);
81    }
82
83    private void TaskScheduler_UnobservedTaskException(object sender, UnobservedTaskExceptionEventArgs e) {
84      e.SetObserved(); // avoid crash of process because task crashes. first exception found is handled in Results property
85      throw new HiveException("Unobserved Exception in ConcurrentTaskDownloader", e.Exception);
86    }
87
88    public void ClearHiveClient() {
89      Jobs.ClearWithoutHiveDeletion();
90      foreach (var j in Jobs) {
91        if (j.RefreshAutomatically) {
92          j.RefreshAutomatically = false; // stop result polling
93        }
94        j.Dispose();
95      }
96      Jobs = null;
97
98      if (onlinePlugins != null)
99        onlinePlugins.Clear();
100      if (alreadyUploadedPlugins != null)
101        alreadyUploadedPlugins.Clear();
102    }
103
104    #region Refresh
105    public void Refresh() {
106      OnRefreshing();
107
108      try {
109        IsAllowedPrivileged = HiveServiceLocator.Instance.CallHiveService((s) => s.IsAllowedPrivileged());
110
111        jobs = new HiveItemCollection<RefreshableJob>();
112        var jobsLoaded = HiveServiceLocator.Instance.CallHiveService<IEnumerable<Job>>(s => s.GetJobs());
113
114        foreach (var j in jobsLoaded) {
115          jobs.Add(new RefreshableJob(j) { IsAllowedPrivileged = this.isAllowedPrivileged });
116        }
117      }
118      catch {
119        jobs = null;
120        throw;
121      }
122      finally {
123        OnRefreshed();
124      }
125    }
126
127    public void RefreshAsync(Action<Exception> exceptionCallback) {
128      var call = new Func<Exception>(delegate() {
129        try {
130          Refresh();
131        }
132        catch (Exception ex) {
133          return ex;
134        }
135        return null;
136      });
137      call.BeginInvoke(delegate(IAsyncResult result) {
138        Exception ex = call.EndInvoke(result);
139        if (ex != null) exceptionCallback(ex);
140      }, null);
141    }
142    #endregion
143
144    #region Store
145    public static void Store(IHiveItem item, CancellationToken cancellationToken) {
146      if (item.Id == Guid.Empty) {
147        if (item is RefreshableJob) {
148          HiveClient.Instance.UploadJob((RefreshableJob)item, cancellationToken);
149        }
150        if (item is JobPermission) {
151          var hep = (JobPermission)item;
152          hep.GrantedUserId = HiveServiceLocator.Instance.CallHiveService((s) => s.GetUserIdByUsername(hep.GrantedUserName));
153          if (hep.GrantedUserId == Guid.Empty) {
154            throw new ArgumentException(string.Format("The user {0} was not found.", hep.GrantedUserName));
155          }
156          HiveServiceLocator.Instance.CallHiveService((s) => s.GrantPermission(hep.JobId, hep.GrantedUserId, hep.Permission));
157        }
158      } else {
159        if (item is Job)
160          HiveServiceLocator.Instance.CallHiveService(s => s.UpdateJob((Job)item));
161      }
162    }
163    public static void StoreAsync(Action<Exception> exceptionCallback, IHiveItem item, CancellationToken cancellationToken) {
164      var call = new Func<Exception>(delegate() {
165        try {
166          Store(item, cancellationToken);
167        }
168        catch (Exception ex) {
169          return ex;
170        }
171        return null;
172      });
173      call.BeginInvoke(delegate(IAsyncResult result) {
174        Exception ex = call.EndInvoke(result);
175        if (ex != null) exceptionCallback(ex);
176      }, null);
177    }
178    #endregion
179
180    #region Delete
181    public static void Delete(IHiveItem item) {
182      if (item.Id == Guid.Empty && item.GetType() != typeof(JobPermission))
183        return;
184
185      if (item is Job)
186        HiveServiceLocator.Instance.CallHiveService(s => s.DeleteJob(item.Id));
187      if (item is RefreshableJob) {
188        RefreshableJob job = (RefreshableJob)item;
189        if (job.RefreshAutomatically) {
190          job.StopResultPolling();
191        }
192        HiveServiceLocator.Instance.CallHiveService(s => s.DeleteJob(item.Id));
193      }
194      if (item is JobPermission) {
195        var hep = (JobPermission)item;
196        HiveServiceLocator.Instance.CallHiveService(s => s.RevokePermission(hep.JobId, hep.GrantedUserId));
197      }
198      item.Id = Guid.Empty;
199    }
200    #endregion
201
202    #region Events
203    public event EventHandler Refreshing;
204    private void OnRefreshing() {
205      EventHandler handler = Refreshing;
206      if (handler != null) handler(this, EventArgs.Empty);
207    }
208    public event EventHandler Refreshed;
209    private void OnRefreshed() {
210      var handler = Refreshed;
211      if (handler != null) handler(this, EventArgs.Empty);
212    }
213    public event EventHandler HiveJobsChanged;
214    private void OnHiveJobsChanged() {
215      var handler = HiveJobsChanged;
216      if (handler != null) handler(this, EventArgs.Empty);
217    }
218    #endregion
219
220    public static void StartJob(Action<Exception> exceptionCallback, RefreshableJob refreshableJob, CancellationToken cancellationToken) {
221      HiveClient.StoreAsync(
222        new Action<Exception>((Exception ex) => {
223          refreshableJob.ExecutionState = ExecutionState.Prepared;
224          exceptionCallback(ex);
225        }), refreshableJob, cancellationToken);
226      refreshableJob.ExecutionState = ExecutionState.Started;
227    }
228
229    public static void PauseJob(RefreshableJob refreshableJob) {
230      HiveServiceLocator.Instance.CallHiveService(service => {
231        foreach (HiveTask task in refreshableJob.GetAllHiveTasks()) {
232          if (task.Task.State != TaskState.Finished && task.Task.State != TaskState.Aborted && task.Task.State != TaskState.Failed)
233            service.PauseTask(task.Task.Id);
234        }
235      });
236      refreshableJob.ExecutionState = ExecutionState.Paused;
237    }
238
239    public static void StopJob(RefreshableJob refreshableJob) {
240      HiveServiceLocator.Instance.CallHiveService(service => {
241        foreach (HiveTask task in refreshableJob.GetAllHiveTasks()) {
242          if (task.Task.State != TaskState.Finished && task.Task.State != TaskState.Aborted && task.Task.State != TaskState.Failed)
243            service.StopTask(task.Task.Id);
244        }
245      });
246      refreshableJob.ExecutionState = ExecutionState.Stopped;
247    }
248
249    public static void ResumeJob(RefreshableJob refreshableJob) {
250      HiveServiceLocator.Instance.CallHiveService(service => {
251        foreach (HiveTask task in refreshableJob.GetAllHiveTasks()) {
252          if (task.Task.State == TaskState.Paused) {
253            service.RestartTask(task.Task.Id);
254          }
255        }
256      });
257      refreshableJob.ExecutionState = ExecutionState.Started;
258    }
259
260    #region Upload Job
261    private Semaphore taskUploadSemaphore = new Semaphore(Settings.Default.MaxParallelUploads, Settings.Default.MaxParallelUploads);
262    private static object jobCountLocker = new object();
263    private static object pluginLocker = new object();
264    private void UploadJob(RefreshableJob refreshableJob, CancellationToken cancellationToken) {
265      try {
266        refreshableJob.IsProgressing = true;
267        refreshableJob.Progress = new Progress("Connecting to server...");
268        IEnumerable<string> resourceNames = ToResourceNameList(refreshableJob.Job.ResourceNames);
269        var resourceIds = new List<Guid>();
270        foreach (var resourceName in resourceNames) {
271          Guid resourceId = HiveServiceLocator.Instance.CallHiveService((s) => s.GetResourceId(resourceName));
272          if (resourceId == Guid.Empty) {
273            throw new ResourceNotFoundException(string.Format("Could not find the resource '{0}'", resourceName));
274          }
275          resourceIds.Add(resourceId);
276        }
277
278        foreach (OptimizerHiveTask hiveJob in refreshableJob.HiveTasks.OfType<OptimizerHiveTask>()) {
279          hiveJob.SetIndexInParentOptimizerList(null);
280        }
281
282        // upload Job
283        refreshableJob.Progress.Status = "Uploading Job...";
284        refreshableJob.Job.Id = HiveServiceLocator.Instance.CallHiveService((s) => s.AddJob(refreshableJob.Job));
285        bool isPrivileged = refreshableJob.Job.IsPrivileged;
286        refreshableJob.Job = HiveServiceLocator.Instance.CallHiveService((s) => s.GetJob(refreshableJob.Job.Id)); // update owner and permissions
287        refreshableJob.Job.IsPrivileged = isPrivileged;
288        cancellationToken.ThrowIfCancellationRequested();
289
290        int totalJobCount = refreshableJob.GetAllHiveTasks().Count();
291        int[] jobCount = new int[1]; // use a reference type (int-array) instead of value type (int) in order to pass the value via a delegate to task-parallel-library
292        cancellationToken.ThrowIfCancellationRequested();
293
294        // upload plugins
295        refreshableJob.Progress.Status = "Uploading plugins...";
296        this.OnlinePlugins = HiveServiceLocator.Instance.CallHiveService((s) => s.GetPlugins());
297        this.AlreadyUploadedPlugins = new List<Plugin>();
298        Plugin configFilePlugin = HiveServiceLocator.Instance.CallHiveService((s) => UploadConfigurationFile(s, onlinePlugins));
299        this.alreadyUploadedPlugins.Add(configFilePlugin);
300        cancellationToken.ThrowIfCancellationRequested();
301
302        // upload tasks
303        refreshableJob.Progress.Status = "Uploading tasks...";
304
305        var tasks = new List<TS.Task>();
306        foreach (HiveTask hiveTask in refreshableJob.HiveTasks) {
307          var task = TS.Task.Factory.StartNew((hj) => {
308            UploadTaskWithChildren(refreshableJob.Progress, (HiveTask)hj, null, resourceIds, jobCount, totalJobCount, configFilePlugin.Id, refreshableJob.Job.Id, refreshableJob.Log, refreshableJob.Job.IsPrivileged, cancellationToken);
309          }, hiveTask);
310          task.ContinueWith((x) => refreshableJob.Log.LogException(x.Exception), TaskContinuationOptions.OnlyOnFaulted);
311          tasks.Add(task);
312        }
313        TS.Task.WaitAll(tasks.ToArray());
314      }
315      finally {
316        //refreshableJob.RefreshAutomatically = true;       
317        refreshableJob.Job.Modified = false;
318        refreshableJob.IsProgressing = false;
319        refreshableJob.Progress.Finish();
320      }
321    }
322
323    /// <summary>
324    /// Uploads the local configuration file as plugin
325    /// </summary>
326    private static Plugin UploadConfigurationFile(IHiveService service, List<Plugin> onlinePlugins) {
327      string exeFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, Settings.Default.HLBinaryName);
328      string configFileName = Path.GetFileName(ConfigurationManager.OpenExeConfiguration(exeFilePath).FilePath);
329      string configFilePath = ConfigurationManager.OpenExeConfiguration(exeFilePath).FilePath;
330      byte[] hash;
331
332      byte[] data = File.ReadAllBytes(configFilePath);
333      using (SHA1 sha1 = SHA1.Create()) {
334        hash = sha1.ComputeHash(data);
335      }
336
337      Plugin configPlugin = new Plugin() { Name = "Configuration", Version = new Version(), Hash = hash };
338      PluginData configFile = new PluginData() { FileName = configFileName, Data = data };
339
340      IEnumerable<Plugin> onlineConfig = onlinePlugins.Where(p => p.Hash.SequenceEqual(hash));
341
342      if (onlineConfig.Count() > 0) {
343        return onlineConfig.First();
344      } else {
345        configPlugin.Id = service.AddPlugin(configPlugin, new List<PluginData> { configFile });
346        return configPlugin;
347      }
348    }
349
350    /// <summary>
351    /// Uploads the given task and all its child-jobs while setting the proper parentJobId values for the childs
352    /// </summary>
353    /// <param name="parentHiveTask">shall be null if its the root task</param>
354    private void UploadTaskWithChildren(Progress progress, HiveTask hiveTask, HiveTask parentHiveTask, IEnumerable<Guid> groups, int[] taskCount, int totalJobCount, Guid configPluginId, Guid jobId, ILog log, bool isPrivileged, CancellationToken cancellationToken) {
355      taskUploadSemaphore.WaitOne();
356      bool semaphoreReleased = false;
357      try {
358        cancellationToken.ThrowIfCancellationRequested();
359        lock (jobCountLocker) {
360          taskCount[0]++;
361        }
362        TaskData taskData;
363        List<IPluginDescription> plugins;
364
365        if (hiveTask.ItemTask.ComputeInParallel && (hiveTask.ItemTask.Item is Optimization.Experiment || hiveTask.ItemTask.Item is Optimization.BatchRun)) {
366          hiveTask.Task.IsParentTask = true;
367          hiveTask.Task.FinishWhenChildJobsFinished = true;
368          taskData = hiveTask.GetAsTaskData(true, out plugins);
369        } else {
370          hiveTask.Task.IsParentTask = false;
371          hiveTask.Task.FinishWhenChildJobsFinished = false;
372          taskData = hiveTask.GetAsTaskData(false, out plugins);
373        }
374        cancellationToken.ThrowIfCancellationRequested();
375
376        TryAndRepeat(() => {
377          if (!cancellationToken.IsCancellationRequested) {
378            lock (pluginLocker) {
379              HiveServiceLocator.Instance.CallHiveService((s) => hiveTask.Task.PluginsNeededIds = PluginUtil.GetPluginDependencies(s, this.onlinePlugins, this.alreadyUploadedPlugins, plugins));
380            }
381          }
382        }, Settings.Default.MaxRepeatServiceCalls, "Failed to upload plugins");
383        cancellationToken.ThrowIfCancellationRequested();
384        hiveTask.Task.PluginsNeededIds.Add(configPluginId);
385        hiveTask.Task.JobId = jobId;
386        hiveTask.Task.IsPrivileged = isPrivileged;
387
388        log.LogMessage(string.Format("Uploading task ({0} kb, {1} objects)", taskData.Data.Count() / 1024, hiveTask.ItemTask.GetObjectGraphObjects().Count()));
389        TryAndRepeat(() => {
390          if (!cancellationToken.IsCancellationRequested) {
391            if (parentHiveTask != null) {
392              hiveTask.Task.Id = HiveServiceLocator.Instance.CallHiveService((s) => s.AddChildTask(parentHiveTask.Task.Id, hiveTask.Task, taskData));
393            } else {
394              hiveTask.Task.Id = HiveServiceLocator.Instance.CallHiveService((s) => s.AddTask(hiveTask.Task, taskData, groups.ToList()));
395            }
396          }
397        }, Settings.Default.MaxRepeatServiceCalls, "Failed to add task", log);
398        cancellationToken.ThrowIfCancellationRequested();
399
400        lock (jobCountLocker) {
401          progress.ProgressValue = (double)taskCount[0] / totalJobCount;
402          progress.Status = string.Format("Uploaded task ({0} of {1})", taskCount[0], totalJobCount);
403        }
404
405        var tasks = new List<TS.Task>();
406        foreach (HiveTask child in hiveTask.ChildHiveTasks) {
407          var task = TS.Task.Factory.StartNew((tuple) => {
408            var arguments = (Tuple<HiveTask, HiveTask>)tuple;
409            UploadTaskWithChildren(progress, arguments.Item1, arguments.Item2, groups, taskCount, totalJobCount, configPluginId, jobId, log, isPrivileged, cancellationToken);
410          }, new Tuple<HiveTask, HiveTask>(child, hiveTask));
411          task.ContinueWith((x) => log.LogException(x.Exception), TaskContinuationOptions.OnlyOnFaulted);
412          tasks.Add(task);
413        }
414        taskUploadSemaphore.Release(); semaphoreReleased = true; // the semaphore has to be release before waitall!
415        TS.Task.WaitAll(tasks.ToArray());
416      }
417      finally {
418        if (!semaphoreReleased) taskUploadSemaphore.Release();
419      }
420    }
421    #endregion
422
423    #region Download Experiment
424    public static void LoadJob(RefreshableJob refreshableJob) {
425      var hiveExperiment = refreshableJob.Job;
426      refreshableJob.IsProgressing = true;
427      refreshableJob.Progress = new Progress();
428      TaskDownloader downloader = null;
429
430      try {
431        int totalJobCount = 0;
432        IEnumerable<LightweightTask> allTasks;
433
434        refreshableJob.Progress.Status = "Connecting to Server...";
435        // fetch all task objects to create the full tree of tree of HiveTask objects
436        refreshableJob.Progress.Status = "Downloading list of tasks...";
437        allTasks = HiveServiceLocator.Instance.CallHiveService(s => s.GetLightweightJobTasksWithoutStateLog(hiveExperiment.Id));
438        totalJobCount = allTasks.Count();
439
440        refreshableJob.Progress.Status = "Downloading tasks...";
441        downloader = new TaskDownloader(allTasks.Select(x => x.Id));
442        downloader.StartAsync();
443
444        while (!downloader.IsFinished) {
445          refreshableJob.Progress.ProgressValue = downloader.FinishedCount / (double)totalJobCount;
446          refreshableJob.Progress.Status = string.Format("Downloading/deserializing tasks... ({0}/{1} finished)", downloader.FinishedCount, totalJobCount);
447          Thread.Sleep(500);
448
449          if (downloader.IsFaulted) {
450            throw downloader.Exception;
451          }
452        }
453        IDictionary<Guid, HiveTask> allHiveTasks = downloader.Results;
454        var parents = allHiveTasks.Values.Where(x => !x.Task.ParentTaskId.HasValue);
455
456        refreshableJob.Progress.Status = "Downloading/deserializing complete. Displaying tasks...";
457        // build child-task tree
458        foreach (HiveTask hiveTask in parents) {
459          BuildHiveJobTree(hiveTask, allTasks, allHiveTasks);
460        }
461
462        refreshableJob.HiveTasks = new ItemCollection<HiveTask>(parents);
463        if (refreshableJob.IsFinished()) {
464          refreshableJob.ExecutionState = Core.ExecutionState.Stopped;
465        } else {
466          refreshableJob.ExecutionState = Core.ExecutionState.Started;
467        }
468        refreshableJob.OnLoaded();
469      }
470      finally {
471        refreshableJob.IsProgressing = false;
472        refreshableJob.Progress.Finish();
473        if (downloader != null) {
474          downloader.Dispose();
475        }
476      }
477    }
478
479    private static void BuildHiveJobTree(HiveTask parentHiveTask, IEnumerable<LightweightTask> allTasks, IDictionary<Guid, HiveTask> allHiveTasks) {
480      IEnumerable<LightweightTask> childTasks = from job in allTasks
481                                                where job.ParentTaskId.HasValue && job.ParentTaskId.Value == parentHiveTask.Task.Id
482                                                orderby job.DateCreated ascending
483                                                select job;
484      foreach (LightweightTask task in childTasks) {
485        HiveTask childHiveTask = allHiveTasks[task.Id];
486        BuildHiveJobTree(childHiveTask, allTasks, allHiveTasks);
487        parentHiveTask.AddChildHiveTask(childHiveTask);
488      }
489    }
490    #endregion
491
492    /// <summary>
493    /// Converts a string which can contain Ids separated by ';' to a enumerable
494    /// </summary>
495    private static IEnumerable<string> ToResourceNameList(string resourceNames) {
496      if (!string.IsNullOrEmpty(resourceNames)) {
497        return resourceNames.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
498      } else {
499        return new List<string>();
500      }
501    }
502
503    public static ItemTask LoadItemJob(Guid jobId) {
504      TaskData taskData = HiveServiceLocator.Instance.CallHiveService(s => s.GetTaskData(jobId));
505      try {
506        return PersistenceUtil.Deserialize<ItemTask>(taskData.Data);
507      }
508      catch {
509        return null;
510      }
511    }
512
513    /// <summary>
514    /// Executes the action. If it throws an exception it is repeated until repetition-count is reached.
515    /// If repetitions is -1, it is repeated infinitely.
516    /// </summary>
517    public static void TryAndRepeat(Action action, int repetitions, string errorMessage, ILog log = null) {
518      while (true) {
519        try { action(); return; }
520        catch (Exception e) {
521          if (repetitions == 0) throw new HiveException(errorMessage, e);
522          if (log != null) log.LogMessage(string.Format("{0}: {1} - will try again!", errorMessage, e.ToString()));
523          repetitions--;
524        }
525      }
526    }
527
528    public static HiveItemCollection<JobPermission> GetJobPermissions(Guid jobId) {
529      return HiveServiceLocator.Instance.CallHiveService((service) => {
530        IEnumerable<JobPermission> jps = service.GetJobPermissions(jobId);
531        foreach (var hep in jps) {
532          hep.UnmodifiedGrantedUserNameUpdate(service.GetUsernameByUserId(hep.GrantedUserId));
533        }
534        return new HiveItemCollection<JobPermission>(jps);
535      });
536    }
537  }
538}
Note: See TracBrowser for help on using the repository browser.