Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/SlaveTask.cs @ 11082

Last change on this file since 11082 was 11082, checked in by ascheibe, 10 years ago

#2153

  • added a new method HandleStartStopPauseError in Executor to handle error conditions in the same way
  • added timeouts for semaphores so that failed tasks or tasks with endless loops don't block the slave
  • removed ExceptionOccured events from Executor/SlaveTask/TaskManager and use TaskFailed instead
  • removed another ExcpetionOccured event in HeartbeatManager that was never used
File size: 9.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.IO;
24using System.Threading;
25using HeuristicLab.Clients.Hive.SlaveCore.Properties;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.PluginInfrastructure.Sandboxing;
29
30namespace HeuristicLab.Clients.Hive.SlaveCore {
31
32  /// <summary>
33  ///  Manages a single task and it's appdomain.
34  /// </summary>
35  public class SlaveTask : MarshalByRefObject {
36    private Executor executor;
37    private AppDomain appDomain;
38    private Semaphore waitForStartBeforeKillSem;
39    private bool executorMonitoringRun;
40    private Thread executorMonitoringThread;
41    private PluginManager pluginManager;
42    private ILog log;
43    public Guid TaskId { get; private set; }
44    public bool IsPrepared { get; private set; }
45    private TaskData originalTaskData;
46
47    private int coresNeeded;
48    public int CoresNeeded {
49      get { return coresNeeded; }
50      set { this.coresNeeded = value; }
51    }
52
53    public TimeSpan ExecutionTime {
54      get {
55        try {
56          return executor != null ? executor.ExecutionTime : TimeSpan.Zero;
57        }
58        catch (Exception ex) {
59          EventLogManager.LogException(ex);
60          return TimeSpan.Zero;
61        }
62      }
63    }
64
65    public SlaveTask(PluginManager pluginManager, int coresNeeded, ILog log) {
66      this.pluginManager = pluginManager;
67      this.coresNeeded = coresNeeded;
68      this.log = log;
69      waitForStartBeforeKillSem = new Semaphore(0, 1);
70      executorMonitoringRun = true;
71      IsPrepared = false;
72    }
73
74    public void StartJobAsync(Task task, TaskData taskData) {
75      try {
76        this.TaskId = task.Id;
77        originalTaskData = taskData;
78        Prepare(task);
79        StartTaskInAppDomain(taskData);
80      }
81      catch (Exception) {
82        // make sure to clean up if something went wrong
83        DisposeAppDomain();
84        throw;
85      }
86    }
87
88    public void PauseTask() {
89      if (!IsPrepared) throw new AppDomainNotCreatedException();
90      if (!executor.IsPausing && !executor.IsStopping) executor.Pause();
91    }
92
93    public void StopTask() {
94      if (!IsPrepared) throw new AppDomainNotCreatedException();
95      if (!executor.IsPausing && !executor.IsStopping) executor.Stop();
96    }
97
98    private void Prepare(Task task) {
99      string pluginDir = Path.Combine(pluginManager.PluginTempBaseDir, task.Id.ToString());
100      string configFileName;
101      pluginManager.PreparePlugins(task, out configFileName);
102      appDomain = CreateAppDomain(task, pluginDir, configFileName);
103      IsPrepared = true;
104    }
105
106    private AppDomain CreateAppDomain(Task task, String pluginDir, string configFileName) {
107      if (task.IsPrivileged) {
108        appDomain = SandboxManager.CreateAndInitPrivilegedSandbox(task.Id.ToString(), pluginDir, Path.Combine(pluginDir, configFileName));
109      } else {
110        appDomain = SandboxManager.CreateAndInitSandbox(task.Id.ToString(), pluginDir, Path.Combine(pluginDir, configFileName));
111      }
112      appDomain.UnhandledException += new UnhandledExceptionEventHandler(AppDomain_UnhandledException);
113
114      log.LogMessage("Creating AppDomain");
115      executor = (Executor)appDomain.CreateInstanceAndUnwrap(typeof(Executor).Assembly.GetName().Name, typeof(Executor).FullName);
116
117      executor.TaskId = task.Id;
118      executor.CoresNeeded = task.CoresNeeded;
119      executor.MemoryNeeded = task.MemoryNeeded;
120      return appDomain;
121    }
122
123    private void StartTaskInAppDomain(TaskData taskData) {
124      executor.Start(taskData.Data);
125      waitForStartBeforeKillSem.Release();
126      StartExecutorMonitoringThread();
127    }
128
129    public void DisposeAppDomain() {
130      log.LogMessage(string.Format("Shutting down Appdomain for Task {0}", TaskId));
131      StopExecutorMonitoringThread();
132
133      if (executor != null) {
134        try {
135          executor.Dispose();
136        }
137        catch (Exception ex) {
138          EventLogManager.LogException(ex);
139        }
140      }
141
142      if (appDomain != null) {
143        appDomain.UnhandledException -= new UnhandledExceptionEventHandler(AppDomain_UnhandledException);
144        int repeat = Settings.Default.PluginDeletionRetries;
145        while (repeat > 0) {
146          try {
147            waitForStartBeforeKillSem.WaitOne(Settings.Default.ExecutorSemTimeouts);
148            AppDomain.Unload(appDomain);
149            waitForStartBeforeKillSem.Dispose();
150            repeat = 0;
151          }
152          catch (CannotUnloadAppDomainException) {
153            log.LogMessage("Could not unload AppDomain, will try again in 1 sec.");
154            Thread.Sleep(Settings.Default.PluginDeletionTimeout);
155            repeat--;
156            if (repeat == 0) {
157              throw; // rethrow and let app crash
158            }
159          }
160        }
161      }
162      pluginManager.DeletePluginsForJob(TaskId);
163      GC.Collect();
164    }
165
166    private void AppDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) {
167      DisposeAppDomain();
168      OnTaskFailed(new Exception("Unhandled exception: " + e.ExceptionObject.ToString()));
169    }
170
171    public TaskData GetTaskData() {
172      TaskData data = null;
173      try {
174        data = executor.GetTaskData();
175        //this means that there was a problem executing the task
176        if (data == null) return originalTaskData;
177      }
178      catch (Exception ex) {
179        EventLogManager.LogException(ex);
180      }
181      return data;
182    }
183
184    #region ExecutorMonitorThread
185    private void StartExecutorMonitoringThread() {
186      executorMonitoringThread = new Thread(MonitorExecutor);
187      executorMonitoringThread.Start();
188    }
189
190    private void StopExecutorMonitoringThread() {
191      if (executorMonitoringThread != null) {
192        if (executorMonitoringRun) {
193          executorMonitoringRun = false;
194          executor.ExecutorCommandQueue.AddMessage(ExecutorMessageType.StopExecutorMonitoringThread);
195        }
196      }
197    }
198
199    /// <summary>
200    /// Because the executor is in an appdomain and is not able to call back
201    /// (because of security -> lease time for marshall-by-ref object is 5 min),
202    /// we have to poll the executor for events we have to react to (e.g. task finished...)   
203    /// </summary>
204    private void MonitorExecutor() {
205      while (executorMonitoringRun) {
206        //this call goes through the appdomain border. We have to
207        //poll so that the the lease gets renewed
208        ExecutorMessage message;
209        do {
210          message = executor.ExecutorCommandQueue.GetMessage();
211        } while (message == null);
212
213        switch (message.MessageType) {
214          case ExecutorMessageType.TaskStarted:
215            OnTaskStarted();
216            break;
217
218          case ExecutorMessageType.TaskPaused:
219            executorMonitoringRun = false;
220            OnTaskPaused();
221            DisposeAppDomain();
222            break;
223
224          case ExecutorMessageType.TaskStopped:
225            executorMonitoringRun = false;
226            OnTaskStopped();
227            DisposeAppDomain();
228            break;
229
230          case ExecutorMessageType.TaskFailed:
231            executorMonitoringRun = false;
232            OnTaskFailed(new TaskFailedException(executor.CurrentExceptionStr));
233            DisposeAppDomain();
234            break;
235
236          case ExecutorMessageType.StopExecutorMonitoringThread:
237            executorMonitoringRun = false;
238            break;
239        }
240      }
241    }
242    #endregion
243
244    public event EventHandler<EventArgs<Guid>> TaskStarted;
245    private void OnTaskStarted() {
246      var handler = TaskStarted;
247      if (handler != null) handler(this, new EventArgs<Guid>(this.TaskId));
248    }
249
250    public event EventHandler<EventArgs<Guid>> TaskStopped;
251    private void OnTaskStopped() {
252      var handler = TaskStopped;
253      if (handler != null) handler(this, new EventArgs<Guid>(this.TaskId));
254    }
255
256    public event EventHandler<EventArgs<Guid>> TaskPaused;
257    private void OnTaskPaused() {
258      var handler = TaskPaused;
259      if (handler != null) handler(this, new EventArgs<Guid>(this.TaskId));
260    }
261
262    public event EventHandler<EventArgs<Guid>> TaskAborted;
263    private void OnTaskAborted() {
264      var handler = TaskAborted;
265      if (handler != null) handler(this, new EventArgs<Guid>(this.TaskId));
266    }
267
268    public event EventHandler<EventArgs<Guid, Exception>> TaskFailed;
269    private void OnTaskFailed(Exception exception) {
270      var handler = TaskFailed;
271      if (handler != null) handler(this, new EventArgs<Guid, Exception>(this.TaskId, exception));
272    }
273  }
274}
Note: See TracBrowser for help on using the repository browser.