Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/Executor.cs @ 11147

Last change on this file since 11147 was 11082, checked in by ascheibe, 10 years ago

#2153

  • added a new method HandleStartStopPauseError in Executor to handle error conditions in the same way
  • added timeouts for semaphores so that failed tasks or tasks with endless loops don't block the slave
  • removed ExceptionOccured events from Executor/SlaveTask/TaskManager and use TaskFailed instead
  • removed another ExcpetionOccured event in HeartbeatManager that was never used
File size: 7.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Threading;
24using HeuristicLab.Clients.Hive.SlaveCore.Properties;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Hive;
28
29namespace HeuristicLab.Clients.Hive.SlaveCore {
30  /// <summary>
31  /// The executor runs in the appdomain and handles the execution of an Hive task.
32  /// </summary>
33  public class Executor : MarshalByRefObject, IDisposable {
34    private bool wasTaskAborted = false;
35    private AutoResetEvent pauseStopSem = new AutoResetEvent(false);
36    private AutoResetEvent startTaskSem = new AutoResetEvent(false); // block start method call
37    private AutoResetEvent taskStartedSem = new AutoResetEvent(false); // make pause or stop wait until start is finished
38    private ExecutorQueue executorQueue;
39    private bool taskDataInvalid = false; // if true, the jobdata is not sent when the task is failed
40    private ITask task;
41    private DateTime creationTime;
42
43    public Guid TaskId { get; set; }
44    public int CoresNeeded { get; set; }
45    public int MemoryNeeded { get; set; }
46    public bool IsStopping { get; set; }
47    public bool IsPausing { get; set; }
48
49    public Exception CurrentException;
50    public String CurrentExceptionStr {
51      get {
52        if (CurrentException != null) {
53          return CurrentException.ToString();
54        } else {
55          return string.Empty;
56        }
57      }
58    }
59
60    public ExecutorQueue ExecutorCommandQueue {
61      get { return executorQueue; }
62    }
63
64    private ExecutionState ExecutionState {
65      get { return task != null ? task.ExecutionState : HeuristicLab.Core.ExecutionState.Stopped; }
66    }
67
68    public TimeSpan ExecutionTime {
69      get { return task != null ? task.ExecutionTime : new TimeSpan(0, 0, 0); }
70    }
71
72    public Executor() {
73      IsStopping = false;
74      IsPausing = false;
75      executorQueue = new ExecutorQueue();
76    }
77
78    public void Start(byte[] serializedJob) {
79      try {
80        creationTime = DateTime.Now;
81        task = PersistenceUtil.Deserialize<ITask>(serializedJob);
82
83        RegisterJobEvents();
84
85        task.Start();
86        if (!startTaskSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) {
87          throw new TimeoutException("Timeout when starting the task. TaskStarted event was not fired.");
88        }
89      }
90      catch (Exception e) {
91        HandleStartStopPauseError(e);
92      }
93      finally {
94        taskStartedSem.Set();
95      }
96    }
97
98    public void Pause() {
99      IsPausing = true;
100      // wait until task is started. if this does not happen, the Task is null an we give up
101      taskStartedSem.WaitOne(Settings.Default.ExecutorSemTimeouts);
102      if (task == null) {
103        HandleStartStopPauseError(new Exception("Pausing task " + this.TaskId + ": Task is null"));
104        return;
105      }
106
107      if (task.ExecutionState == ExecutionState.Started) {
108        try {
109          task.Pause();
110          //we need to block the pause...
111          if (!pauseStopSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) {
112            throw new Exception("Pausing task " + this.TaskId + " timed out.");
113          }
114        }
115        catch (Exception ex) {
116          HandleStartStopPauseError(ex);
117        }
118      }
119    }
120
121    public void Stop() {
122      IsStopping = true;
123      // wait until task is started. if this does not happen, the Task is null an we give up
124      taskStartedSem.WaitOne(Settings.Default.ExecutorSemTimeouts);
125      wasTaskAborted = true;
126
127      if (task == null) {
128        HandleStartStopPauseError(new Exception("Stopping task " + this.TaskId + ": Task is null"));
129        return;
130      }
131
132      if ((ExecutionState == ExecutionState.Started) || (ExecutionState == ExecutionState.Paused)) {
133        try {
134          task.Stop();
135          if (!pauseStopSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) {
136            throw new Exception("Stopping task " + this.TaskId + " timed out.");
137          }
138        }
139        catch (Exception ex) {
140          HandleStartStopPauseError(ex);
141        }
142      }
143    }
144
145    private void RegisterJobEvents() {
146      task.TaskStopped += new EventHandler(Task_TaskStopped);
147      task.TaskFailed += new EventHandler(Task_TaskFailed);
148      task.TaskPaused += new EventHandler(Task_TaskPaused);
149      task.TaskStarted += new EventHandler(Task_TaskStarted);
150    }
151
152    private void DeregisterJobEvents() {
153      task.TaskStopped -= new EventHandler(Task_TaskStopped);
154      task.TaskFailed -= new EventHandler(Task_TaskFailed);
155      task.TaskPaused -= new EventHandler(Task_TaskPaused);
156      task.TaskStarted -= new EventHandler(Task_TaskStarted);
157    }
158
159    #region Task Events
160    private void Task_TaskFailed(object sender, EventArgs e) {
161      IsStopping = true;
162      EventArgs<Exception> ex = (EventArgs<Exception>)e;
163      CurrentException = ex.Value;
164      executorQueue.AddMessage(ExecutorMessageType.TaskFailed);
165    }
166
167    private void Task_TaskStopped(object sender, EventArgs e) {
168      IsStopping = true;
169      if (wasTaskAborted) {
170        pauseStopSem.Set();
171      }
172      executorQueue.AddMessage(ExecutorMessageType.TaskStopped);
173    }
174
175    private void Task_TaskPaused(object sender, EventArgs e) {
176      IsPausing = true;
177      pauseStopSem.Set();
178      executorQueue.AddMessage(ExecutorMessageType.TaskPaused);
179    }
180
181    private void Task_TaskStarted(object sender, EventArgs e) {
182      startTaskSem.Set();
183      executorQueue.AddMessage(ExecutorMessageType.TaskStarted);
184    }
185    #endregion
186
187    public TaskData GetTaskData() {
188      if (taskDataInvalid) return null;
189
190      if (task != null && task.ExecutionState == ExecutionState.Started) {
191        throw new InvalidStateException("Task is still running");
192      }
193
194      TaskData taskData = null;
195      if (task == null) {
196        if (CurrentException == null) {
197          CurrentException = new Exception("Task with id " + this.TaskId + " is null, sending empty task");
198        }
199      } else {
200        taskData = new TaskData();
201        taskData.Data = PersistenceUtil.Serialize(task);
202        taskData.TaskId = TaskId;
203      }
204      return taskData;
205    }
206
207    public void Dispose() {
208      if (task != null)
209        DeregisterJobEvents();
210      task = null;
211    }
212
213    private void HandleStartStopPauseError(Exception e) {
214      taskDataInvalid = true;
215      Task_TaskFailed(this, new EventArgs<Exception>(e));
216    }
217  }
218}
Note: See TracBrowser for help on using the repository browser.