Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/SlaveJob.cs @ 6371

Last change on this file since 6371 was 6371, checked in by ascheibe, 13 years ago

#1233

  • code cleanups for slave review
  • added switch between privileged and unprivileged sandbox
  • removed childjob management because it's not used
File size: 8.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.IO;
24using System.Threading;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.PluginInfrastructure.Sandboxing;
28
29namespace HeuristicLab.Clients.Hive.SlaveCore {
30
31  /// <summary>
32  ///  Manages a single job and it's appdomain.
33  /// </summary>
34  public class SlaveJob : MarshalByRefObject {
35    private Executor executor;
36    private AppDomain appDomain;
37    private Semaphore waitForStartBeforeKillSem;
38    private bool executorMonitoringRun;
39    private Thread executorMonitoringThread;
40    private PluginManager pluginManager;
41    private ILog log;
42    public Guid JobId { get; private set; }
43    public bool IsPrepared { get; private set; }
44
45    private int coresNeeded;
46    public int CoresNeeded {
47      get { return coresNeeded; }
48      set { this.coresNeeded = value; }
49    }
50
51    public TimeSpan ExecutionTime {
52      get { return executor != null ? executor.ExecutionTime : TimeSpan.Zero; }
53    }
54
55    public SlaveJob(PluginManager pluginManager, int coresNeeded, ILog log) {
56      this.pluginManager = pluginManager;
57      this.coresNeeded = coresNeeded;
58      this.log = log;
59      waitForStartBeforeKillSem = new Semaphore(0, 1);
60      executorMonitoringRun = true;
61      IsPrepared = false;
62    }
63
64    public void StartJobAsync(Job job, JobData jobData) {
65      try {
66        this.JobId = job.Id;
67        Prepare(job);
68        StartJobInAppDomain(jobData);
69      }
70      catch (Exception) {
71        // make sure to clean up if something went wrong
72        DisposeAppDomain();
73        throw;
74      }
75    }
76
77    public void PauseJob() {
78      if (!IsPrepared) throw new AppDomainNotCreatedException();
79      if (!executor.IsPausing && !executor.IsStopping) executor.Pause();
80    }
81
82    public void StopJob() {
83      if (!IsPrepared) throw new AppDomainNotCreatedException();
84      if (!executor.IsPausing && !executor.IsStopping) executor.Stop();
85    }
86
87    private void Prepare(Job job) {
88      string pluginDir = Path.Combine(pluginManager.PluginTempBaseDir, job.Id.ToString());
89      string configFileName;
90      pluginManager.PreparePlugins(job, out configFileName);
91      appDomain = CreateAppDomain(job, pluginDir, configFileName);
92      IsPrepared = true;
93    }
94
95    private AppDomain CreateAppDomain(Job job, String pluginDir, string configFileName) {
96      if (job.IsPrivileged) {
97        appDomain = SandboxManager.CreateAndInitPrivilegedSandbox(job.Id.ToString(), pluginDir, Path.Combine(pluginDir, configFileName));
98      } else {
99        appDomain = SandboxManager.CreateAndInitSandbox(job.Id.ToString(), pluginDir, Path.Combine(pluginDir, configFileName));
100      }
101      appDomain.UnhandledException += new UnhandledExceptionEventHandler(AppDomain_UnhandledException);
102
103      log.LogMessage("Creating AppDomain");
104      executor = (Executor)appDomain.CreateInstanceAndUnwrap(typeof(Executor).Assembly.GetName().Name, typeof(Executor).FullName);
105
106      executor.JobId = job.Id;
107      executor.CoresNeeded = job.CoresNeeded;
108      executor.MemoryNeeded = job.MemoryNeeded;
109      return appDomain;
110    }
111
112    private void StartJobInAppDomain(JobData jobData) {
113      executor.Start(jobData.Data);
114      waitForStartBeforeKillSem.Release();
115      StartExecutorMonitoringThread();
116    }
117
118    public void DisposeAppDomain() {
119      log.LogMessage(string.Format("Shutting down Appdomain for Job {0}", JobId));
120      StopExecutorMonitoringThread();
121
122      if (executor != null) {
123        executor.Dispose();
124      }
125
126      if (appDomain != null) {
127        appDomain.UnhandledException -= new UnhandledExceptionEventHandler(AppDomain_UnhandledException);
128        int repeat = 5;
129        while (repeat > 0) {
130          try {
131            waitForStartBeforeKillSem.WaitOne();
132            AppDomain.Unload(appDomain);
133            waitForStartBeforeKillSem.Dispose();
134            repeat = 0;
135          }
136          catch (CannotUnloadAppDomainException) {
137            log.LogMessage("Could not unload AppDomain, will try again in 1 sec.");
138            Thread.Sleep(1000);
139            repeat--;
140            if (repeat == 0) {
141              throw; // rethrow and let app crash
142            }
143          }
144        }
145      }
146      pluginManager.DeletePluginsForJob(JobId);
147      GC.Collect();
148    }
149
150    private void AppDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) {
151      DisposeAppDomain();
152      OnExceptionOccured(new Exception("Unhandled exception: " + e.ExceptionObject.ToString()));
153    }
154
155    public JobData GetJobData() {
156      return executor.GetJobData();
157    }
158
159    #region ExecutorMonitorThread
160    private void StartExecutorMonitoringThread() {
161      executorMonitoringThread = new Thread(MonitorExecutor);
162      executorMonitoringThread.Start();
163    }
164
165    private void StopExecutorMonitoringThread() {
166      if (executorMonitoringThread != null) {
167        if (executorMonitoringRun) {
168          executorMonitoringRun = false;
169          executor.ExecutorCommandQueue.AddMessage(ExecutorMessageType.StopExecutorMonitoringThread);
170        }
171      }
172    }
173
174    /// <summary>
175    /// Because the executor is in an appdomain and is not able to call back
176    /// (because of security -> lease time for marshall-by-ref object is 5 min),
177    /// we have to poll the executor for events we have to react to (e.g. job finished...)   
178    /// </summary>
179    private void MonitorExecutor() {
180      while (executorMonitoringRun) {
181        //this blocks through the appdomain border, that's why the lease gets renewed
182        ExecutorMessage message = executor.ExecutorCommandQueue.GetMessage();
183
184        switch (message.MessageType) {
185          case ExecutorMessageType.JobStarted:
186            OnJobStarted();
187            break;
188
189          case ExecutorMessageType.JobPaused:
190            executorMonitoringRun = false;
191            OnJobPaused();
192            DisposeAppDomain();
193            break;
194
195          case ExecutorMessageType.JobStopped:
196            executorMonitoringRun = false;
197            OnJobStopped();
198            DisposeAppDomain();
199            break;
200
201          case ExecutorMessageType.JobFailed:
202            executorMonitoringRun = false;
203            OnJobFailed(new JobFailedException(executor.CurrentException));
204            DisposeAppDomain();
205            break;
206
207          case ExecutorMessageType.StopExecutorMonitoringThread:
208            executorMonitoringRun = false;
209            return;
210        }
211      }
212    }
213    #endregion
214
215    public event EventHandler<EventArgs<Guid>> JobStarted;
216    private void OnJobStarted() {
217      var handler = JobStarted;
218      if (handler != null) handler(this, new EventArgs<Guid>(this.JobId));
219    }
220
221    public event EventHandler<EventArgs<Guid>> JobStopped;
222    private void OnJobStopped() {
223      var handler = JobStopped;
224      if (handler != null) handler(this, new EventArgs<Guid>(this.JobId));
225    }
226
227    public event EventHandler<EventArgs<Guid>> JobPaused;
228    private void OnJobPaused() {
229      var handler = JobPaused;
230      if (handler != null) handler(this, new EventArgs<Guid>(this.JobId));
231    }
232
233    public event EventHandler<EventArgs<Guid>> JobAborted;
234    private void OnJobAborted() {
235      var handler = JobAborted;
236      if (handler != null) handler(this, new EventArgs<Guid>(this.JobId));
237    }
238
239    public event EventHandler<EventArgs<Guid, Exception>> JobFailed;
240    private void OnJobFailed(Exception exception) {
241      var handler = JobFailed;
242      if (handler != null) handler(this, new EventArgs<Guid, Exception>(this.JobId, exception));
243    }
244
245    public event EventHandler<EventArgs<Guid, Exception>> ExceptionOccured;
246    private void OnExceptionOccured(Exception exception) {
247      var handler = ExceptionOccured;
248      if (handler != null) handler(this, new EventArgs<Guid, Exception>(this.JobId, exception));
249    }
250  }
251}
Note: See TracBrowser for help on using the repository browser.