Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.3/SlaveTask.cs @ 6945

Last change on this file since 6945 was 6945, checked in by ascheibe, 13 years ago

#1233 slave: catch more errors and log them to the windows event log

File size: 9.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.IO;
24using System.Threading;
25using HeuristicLab.Clients.Hive.SlaveCore.Properties;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.PluginInfrastructure.Sandboxing;
29
30namespace HeuristicLab.Clients.Hive.SlaveCore {
31
32  /// <summary>
33  ///  Manages a single task and it's appdomain.
34  /// </summary>
35  public class SlaveTask : MarshalByRefObject {
36    private Executor executor;
37    private AppDomain appDomain;
38    private Semaphore waitForStartBeforeKillSem;
39    private bool executorMonitoringRun;
40    private Thread executorMonitoringThread;
41    private PluginManager pluginManager;
42    private ILog log;
43    public Guid TaskId { get; private set; }
44    public bool IsPrepared { get; private set; }
45
46    private int coresNeeded;
47    public int CoresNeeded {
48      get { return coresNeeded; }
49      set { this.coresNeeded = value; }
50    }
51
52    public TimeSpan ExecutionTime {
53      get {
54        try {
55          return executor != null ? executor.ExecutionTime : TimeSpan.Zero;
56        }
57        catch (Exception ex) {
58          EventLogManager.LogException(ex);
59          return TimeSpan.Zero;
60        }
61      }
62    }
63
64    public SlaveTask(PluginManager pluginManager, int coresNeeded, ILog log) {
65      this.pluginManager = pluginManager;
66      this.coresNeeded = coresNeeded;
67      this.log = log;
68      waitForStartBeforeKillSem = new Semaphore(0, 1);
69      executorMonitoringRun = true;
70      IsPrepared = false;
71    }
72
73    public void StartJobAsync(Task task, TaskData taskData) {
74      try {
75        this.TaskId = task.Id;
76        Prepare(task);
77        StartTaskInAppDomain(taskData);
78      }
79      catch (Exception) {
80        // make sure to clean up if something went wrong
81        DisposeAppDomain();
82        throw;
83      }
84    }
85
86    public void PauseTask() {
87      if (!IsPrepared) throw new AppDomainNotCreatedException();
88      if (!executor.IsPausing && !executor.IsStopping) executor.Pause();
89    }
90
91    public void StopTask() {
92      if (!IsPrepared) throw new AppDomainNotCreatedException();
93      if (!executor.IsPausing && !executor.IsStopping) executor.Stop();
94    }
95
96    private void Prepare(Task task) {
97      string pluginDir = Path.Combine(pluginManager.PluginTempBaseDir, task.Id.ToString());
98      string configFileName;
99      pluginManager.PreparePlugins(task, out configFileName);
100      appDomain = CreateAppDomain(task, pluginDir, configFileName);
101      IsPrepared = true;
102    }
103
104    private AppDomain CreateAppDomain(Task task, String pluginDir, string configFileName) {
105      if (task.IsPrivileged) {
106        appDomain = SandboxManager.CreateAndInitPrivilegedSandbox(task.Id.ToString(), pluginDir, Path.Combine(pluginDir, configFileName));
107      } else {
108        appDomain = SandboxManager.CreateAndInitSandbox(task.Id.ToString(), pluginDir, Path.Combine(pluginDir, configFileName));
109      }
110      appDomain.UnhandledException += new UnhandledExceptionEventHandler(AppDomain_UnhandledException);
111
112      log.LogMessage("Creating AppDomain");
113      executor = (Executor)appDomain.CreateInstanceAndUnwrap(typeof(Executor).Assembly.GetName().Name, typeof(Executor).FullName);
114
115      executor.TaskId = task.Id;
116      executor.CoresNeeded = task.CoresNeeded;
117      executor.MemoryNeeded = task.MemoryNeeded;
118      return appDomain;
119    }
120
121    private void StartTaskInAppDomain(TaskData taskData) {
122      executor.Start(taskData.Data);
123      waitForStartBeforeKillSem.Release();
124      StartExecutorMonitoringThread();
125    }
126
127    public void DisposeAppDomain() {
128      log.LogMessage(string.Format("Shutting down Appdomain for Task {0}", TaskId));
129      StopExecutorMonitoringThread();
130
131      if (executor != null) {
132        try {
133          executor.Dispose();
134        }
135        catch (Exception ex) {
136          EventLogManager.LogException(ex);
137        }
138      }
139
140      if (appDomain != null) {
141        appDomain.UnhandledException -= new UnhandledExceptionEventHandler(AppDomain_UnhandledException);
142        int repeat = Settings.Default.PluginDeletionRetries;
143        while (repeat > 0) {
144          try {
145            waitForStartBeforeKillSem.WaitOne();
146            AppDomain.Unload(appDomain);
147            waitForStartBeforeKillSem.Dispose();
148            repeat = 0;
149          }
150          catch (CannotUnloadAppDomainException) {
151            log.LogMessage("Could not unload AppDomain, will try again in 1 sec.");
152            Thread.Sleep(Settings.Default.PluginDeletionTimeout);
153            repeat--;
154            if (repeat == 0) {
155              throw; // rethrow and let app crash
156            }
157          }
158        }
159      }
160      pluginManager.DeletePluginsForJob(TaskId);
161      GC.Collect();
162    }
163
164    private void AppDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) {
165      DisposeAppDomain();
166      OnExceptionOccured(new Exception("Unhandled exception: " + e.ExceptionObject.ToString()));
167    }
168
169    public TaskData GetTaskData() {
170      TaskData data = null;
171      try {
172        data = executor.GetTaskData();
173      }
174      catch (Exception ex) {
175        EventLogManager.LogException(ex);
176      }
177      return data;
178    }
179
180    #region ExecutorMonitorThread
181    private void StartExecutorMonitoringThread() {
182      executorMonitoringThread = new Thread(MonitorExecutor);
183      executorMonitoringThread.Start();
184    }
185
186    private void StopExecutorMonitoringThread() {
187      if (executorMonitoringThread != null) {
188        if (executorMonitoringRun) {
189          executorMonitoringRun = false;
190          executor.ExecutorCommandQueue.AddMessage(ExecutorMessageType.StopExecutorMonitoringThread);
191        }
192      }
193    }
194
195    /// <summary>
196    /// Because the executor is in an appdomain and is not able to call back
197    /// (because of security -> lease time for marshall-by-ref object is 5 min),
198    /// we have to poll the executor for events we have to react to (e.g. task finished...)   
199    /// </summary>
200    private void MonitorExecutor() {
201      while (executorMonitoringRun) {
202        //this blocks through the appdomain border, that's why the lease gets renewed
203        ExecutorMessage message = executor.ExecutorCommandQueue.GetMessage();
204
205        switch (message.MessageType) {
206          case ExecutorMessageType.TaskStarted:
207            OnTaskStarted();
208            break;
209
210          case ExecutorMessageType.TaskPaused:
211            executorMonitoringRun = false;
212            OnTaskPaused();
213            DisposeAppDomain();
214            break;
215
216          case ExecutorMessageType.TaskStopped:
217            executorMonitoringRun = false;
218            OnTaskStopped();
219            DisposeAppDomain();
220            break;
221
222          case ExecutorMessageType.TaskFailed:
223            executorMonitoringRun = false;
224            OnTaskFailed(new TaskFailedException(executor.CurrentExceptionStr));
225            DisposeAppDomain();
226            break;
227
228          case ExecutorMessageType.StopExecutorMonitoringThread:
229            executorMonitoringRun = false;
230            break;
231
232          case ExecutorMessageType.ExceptionOccured:
233            executorMonitoringRun = false;
234            DisposeAppDomain();
235            if (executor.CurrentException != null) {
236              OnExceptionOccured(executor.CurrentException);
237            } else {
238              OnExceptionOccured(new Exception(string.Format("Unknow exception occured in Executor for task {0}", TaskId)));
239            }
240            break;
241        }
242      }
243    }
244    #endregion
245
246    public event EventHandler<EventArgs<Guid>> TaskStarted;
247    private void OnTaskStarted() {
248      var handler = TaskStarted;
249      if (handler != null) handler(this, new EventArgs<Guid>(this.TaskId));
250    }
251
252    public event EventHandler<EventArgs<Guid>> TaskStopped;
253    private void OnTaskStopped() {
254      var handler = TaskStopped;
255      if (handler != null) handler(this, new EventArgs<Guid>(this.TaskId));
256    }
257
258    public event EventHandler<EventArgs<Guid>> TaskPaused;
259    private void OnTaskPaused() {
260      var handler = TaskPaused;
261      if (handler != null) handler(this, new EventArgs<Guid>(this.TaskId));
262    }
263
264    public event EventHandler<EventArgs<Guid>> TaskAborted;
265    private void OnTaskAborted() {
266      var handler = TaskAborted;
267      if (handler != null) handler(this, new EventArgs<Guid>(this.TaskId));
268    }
269
270    public event EventHandler<EventArgs<Guid, Exception>> TaskFailed;
271    private void OnTaskFailed(Exception exception) {
272      var handler = TaskFailed;
273      if (handler != null) handler(this, new EventArgs<Guid, Exception>(this.TaskId, exception));
274    }
275
276    public event EventHandler<EventArgs<Guid, Exception>> ExceptionOccured;
277    private void OnExceptionOccured(Exception exception) {
278      var handler = ExceptionOccured;
279      if (handler != null) handler(this, new EventArgs<Guid, Exception>(this.TaskId, exception));
280    }
281  }
282}
Note: See TracBrowser for help on using the repository browser.