Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/Executor.cs @ 13329

Last change on this file since 13329 was 12920, checked in by jkarder, 9 years ago

#2468: implemented checkpointing

File size: 8.6 KB
RevLine 
[6983]1#region License Information
2/* HeuristicLab
[12012]3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[6983]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Threading;
24using HeuristicLab.Clients.Hive.SlaveCore.Properties;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Hive;
28
29namespace HeuristicLab.Clients.Hive.SlaveCore {
30  /// <summary>
31  /// The executor runs in the appdomain and handles the execution of an Hive task.
32  /// </summary>
33  public class Executor : MarshalByRefObject, IDisposable {
34    private bool wasTaskAborted = false;
35    private AutoResetEvent pauseStopSem = new AutoResetEvent(false);
36    private AutoResetEvent startTaskSem = new AutoResetEvent(false); // block start method call
37    private AutoResetEvent taskStartedSem = new AutoResetEvent(false); // make pause or stop wait until start is finished
38    private ExecutorQueue executorQueue;
39    private bool taskDataInvalid = false; // if true, the jobdata is not sent when the task is failed
40    private ITask task;
41    private DateTime creationTime;
42
43    public Guid TaskId { get; set; }
44    public int CoresNeeded { get; set; }
45    public int MemoryNeeded { get; set; }
46    public bool IsStopping { get; set; }
47    public bool IsPausing { get; set; }
48
49    public Exception CurrentException;
50    public String CurrentExceptionStr {
51      get {
52        if (CurrentException != null) {
53          return CurrentException.ToString();
54        } else {
55          return string.Empty;
56        }
57      }
58    }
59
60    public ExecutorQueue ExecutorCommandQueue {
61      get { return executorQueue; }
62    }
63
64    private ExecutionState ExecutionState {
65      get { return task != null ? task.ExecutionState : HeuristicLab.Core.ExecutionState.Stopped; }
66    }
67
68    public TimeSpan ExecutionTime {
69      get { return task != null ? task.ExecutionTime : new TimeSpan(0, 0, 0); }
70    }
71
72    public Executor() {
73      IsStopping = false;
74      IsPausing = false;
75      executorQueue = new ExecutorQueue();
76    }
77
78    public void Start(byte[] serializedJob) {
79      try {
80        creationTime = DateTime.Now;
81        task = PersistenceUtil.Deserialize<ITask>(serializedJob);
82
83        RegisterJobEvents();
84
85        task.Start();
86        if (!startTaskSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) {
87          throw new TimeoutException("Timeout when starting the task. TaskStarted event was not fired.");
88        }
89      }
90      catch (Exception e) {
[11082]91        HandleStartStopPauseError(e);
92      }
93      finally {
[6983]94        taskStartedSem.Set();
95      }
96    }
97
98    public void Pause() {
99      IsPausing = true;
100      // wait until task is started. if this does not happen, the Task is null an we give up
101      taskStartedSem.WaitOne(Settings.Default.ExecutorSemTimeouts);
102      if (task == null) {
[11082]103        HandleStartStopPauseError(new Exception("Pausing task " + this.TaskId + ": Task is null"));
[6983]104        return;
105      }
106
107      if (task.ExecutionState == ExecutionState.Started) {
108        try {
109          task.Pause();
110          //we need to block the pause...
[11082]111          if (!pauseStopSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) {
112            throw new Exception("Pausing task " + this.TaskId + " timed out.");
113          }
[6983]114        }
115        catch (Exception ex) {
[11082]116          HandleStartStopPauseError(ex);
[6983]117        }
118      }
119    }
120
121    public void Stop() {
122      IsStopping = true;
123      // wait until task is started. if this does not happen, the Task is null an we give up
124      taskStartedSem.WaitOne(Settings.Default.ExecutorSemTimeouts);
[11082]125      wasTaskAborted = true;
126
[6983]127      if (task == null) {
[11082]128        HandleStartStopPauseError(new Exception("Stopping task " + this.TaskId + ": Task is null"));
129        return;
[6983]130      }
131
132      if ((ExecutionState == ExecutionState.Started) || (ExecutionState == ExecutionState.Paused)) {
133        try {
134          task.Stop();
[11082]135          if (!pauseStopSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) {
136            throw new Exception("Stopping task " + this.TaskId + " timed out.");
137          }
[6983]138        }
139        catch (Exception ex) {
[11082]140          HandleStartStopPauseError(ex);
[6983]141        }
142      }
143    }
144
145    private void RegisterJobEvents() {
146      task.TaskStopped += new EventHandler(Task_TaskStopped);
147      task.TaskFailed += new EventHandler(Task_TaskFailed);
148      task.TaskPaused += new EventHandler(Task_TaskPaused);
149      task.TaskStarted += new EventHandler(Task_TaskStarted);
150    }
151
152    private void DeregisterJobEvents() {
153      task.TaskStopped -= new EventHandler(Task_TaskStopped);
154      task.TaskFailed -= new EventHandler(Task_TaskFailed);
155      task.TaskPaused -= new EventHandler(Task_TaskPaused);
156      task.TaskStarted -= new EventHandler(Task_TaskStarted);
157    }
158
159    #region Task Events
160    private void Task_TaskFailed(object sender, EventArgs e) {
161      IsStopping = true;
162      EventArgs<Exception> ex = (EventArgs<Exception>)e;
163      CurrentException = ex.Value;
164      executorQueue.AddMessage(ExecutorMessageType.TaskFailed);
165    }
166
167    private void Task_TaskStopped(object sender, EventArgs e) {
168      IsStopping = true;
169      if (wasTaskAborted) {
170        pauseStopSem.Set();
171      }
172      executorQueue.AddMessage(ExecutorMessageType.TaskStopped);
173    }
174
175    private void Task_TaskPaused(object sender, EventArgs e) {
176      IsPausing = true;
177      pauseStopSem.Set();
178      executorQueue.AddMessage(ExecutorMessageType.TaskPaused);
179    }
180
181    private void Task_TaskStarted(object sender, EventArgs e) {
182      startTaskSem.Set();
183      executorQueue.AddMessage(ExecutorMessageType.TaskStarted);
184    }
185    #endregion
186
[12920]187    public Tuple<TaskData, DateTime> GetTaskDataSnapshot() {
188      if (taskDataInvalid) return null;
189
190      Tuple<TaskData, DateTime> snapshot = null;
191      if (task == null) {
192        if (CurrentException == null) {
193          CurrentException = new Exception("Task with id " + this.TaskId + " is null, sending empty task");
194        }
195      } else {
196        var taskData = new TaskData();
197
198        var pausedTrigger = new EventWaitHandle(false, EventResetMode.ManualReset);
199        EventHandler pausedHandler = null;
200        pausedHandler = (s, e) => {
201          task.TaskPaused -= pausedHandler;
202          task.TaskPaused += Task_TaskPaused;
203          pausedTrigger.Set();
204        };
205
206        task.TaskPaused -= Task_TaskPaused;
207        task.TaskPaused += pausedHandler;
208        task.Pause();
209        pausedTrigger.WaitOne();
210
211        taskData.Data = PersistenceUtil.Serialize(task);
212        var timestamp = DateTime.Now;
213
214        EventHandler startedHandler = null;
215        startedHandler = (s, e) => {
216          task.TaskStarted -= startedHandler;
217          task.TaskStarted += Task_TaskStarted;
218        };
219
220        task.TaskStarted -= Task_TaskStarted;
221        task.TaskStarted += startedHandler;
222        task.Start();
223
224        taskData.TaskId = TaskId;
225        snapshot = Tuple.Create(taskData, timestamp);
226      }
227
228      return snapshot;
229    }
230
[6983]231    public TaskData GetTaskData() {
232      if (taskDataInvalid) return null;
233
[7166]234      if (task != null && task.ExecutionState == ExecutionState.Started) {
[6983]235        throw new InvalidStateException("Task is still running");
[11082]236      }
237
238      TaskData taskData = null;
239      if (task == null) {
240        if (CurrentException == null) {
241          CurrentException = new Exception("Task with id " + this.TaskId + " is null, sending empty task");
242        }
[6983]243      } else {
[11082]244        taskData = new TaskData();
245        taskData.Data = PersistenceUtil.Serialize(task);
[6983]246        taskData.TaskId = TaskId;
247      }
[11082]248      return taskData;
[6983]249    }
250
251    public void Dispose() {
252      if (task != null)
253        DeregisterJobEvents();
254      task = null;
255    }
[11082]256
257    private void HandleStartStopPauseError(Exception e) {
258      taskDataInvalid = true;
259      Task_TaskFailed(this, new EventArgs<Exception>(e));
260    }
[6983]261  }
262}
Note: See TracBrowser for help on using the repository browser.