Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Hive.Client.Core/3.2/Core.cs @ 2502

Last change on this file since 2502 was 2117, checked in by svonolfe, 15 years ago

Streaming of Jobs and JobsResults directly from/to the DB (#680)

File size: 17.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Hive.Client.ExecutionEngine;
27using HeuristicLab.Hive.Client.Common;
28using System.Threading;
29using System.Reflection;
30using System.Diagnostics;
31using System.Security.Permissions;
32using System.Security.Policy;
33using System.Security;
34using HeuristicLab.Hive.Client.Communication;
35using HeuristicLab.Hive.Contracts.BusinessObjects;
36using HeuristicLab.Hive.Contracts;
37using System.Runtime.Remoting.Messaging;
38using HeuristicLab.PluginInfrastructure;
39using System.ServiceModel;
40using System.ServiceModel.Description;
41using HeuristicLab.Hive.Client.Core.ClientConsoleService;
42using HeuristicLab.Hive.Client.Core.ConfigurationManager;
43using HeuristicLab.Hive.Client.Communication.ServerService;
44using HeuristicLab.Hive.JobBase;
45using HeuristicLab.Hive.Client.Core.JobStorage;
46
47namespace HeuristicLab.Hive.Client.Core {
48  /// <summary>
49  /// The core component of the Hive Client
50  /// </summary>
51  public class Core: MarshalByRefObject {       
52    public static bool abortRequested { get; set; }
53    private bool currentlyFetching = false;
54
55    private Dictionary<Guid, Executor> engines = new Dictionary<Guid, Executor>();
56    private Dictionary<Guid, AppDomain> appDomains = new Dictionary<Guid, AppDomain>();
57    private Dictionary<Guid, Job> jobs = new Dictionary<Guid, Job>();
58
59    private WcfService wcfService;
60    private Heartbeat beat;
61   
62    /// <summary>
63    /// Main Method for the client
64    /// </summary>
65    public void Start() {     
66      abortRequested = false;
67      PluginManager.Manager.Initialize();
68      Logging.Instance.Info(this.ToString(), "Hive Client started");
69      ClientConsoleServer server = new ClientConsoleServer();
70      server.StartClientConsoleServer(new Uri("net.tcp://127.0.0.1:8000/ClientConsole/"));
71
72      ConfigManager manager = ConfigManager.Instance;
73      manager.Core = this;
74
75
76     
77      //Register all Wcf Service references
78      wcfService = WcfService.Instance;
79      wcfService.LoginCompleted += new EventHandler<LoginCompletedEventArgs>(wcfService_LoginCompleted);
80      wcfService.SendJobCompleted += new EventHandler<SendJobCompletedEventArgs>(wcfService_SendJobCompleted);
81      wcfService.StoreFinishedJobResultCompleted += new EventHandler<StoreFinishedJobResultCompletedEventArgs>(wcfService_StoreFinishedJobResultCompleted);
82      wcfService.ProcessSnapshotCompleted += new EventHandler<ProcessSnapshotCompletedEventArgs>(wcfService_ProcessSnapshotCompleted);
83      wcfService.ConnectionRestored += new EventHandler(wcfService_ConnectionRestored);
84      wcfService.ServerChanged += new EventHandler(wcfService_ServerChanged);
85      wcfService.Connected += new EventHandler(wcfService_Connected);
86      //Recover Server IP and Port from the Settings Framework
87      ConnectionContainer cc = ConfigManager.Instance.GetServerIPAndPort();     
88      if (cc.IPAdress != String.Empty && cc.Port != 0)
89        wcfService.SetIPAndPort(cc.IPAdress, cc.Port);
90
91      if (UptimeManager.Instance.isOnline())
92        wcfService.Connect();
93         
94      //Initialize the heartbeat
95      beat = new Heartbeat { Interval = 10000 };
96      beat.StartHeartbeat();     
97
98      MessageQueue queue = MessageQueue.GetInstance();
99     
100      //Main processing loop     
101      //Todo: own thread for message handling
102      //Rly?!
103      while (!abortRequested) {
104        MessageContainer container = queue.GetMessage();
105        Debug.WriteLine("Main loop received this message: " + container.Message.ToString());
106        Logging.Instance.Info(this.ToString(), container.Message.ToString());
107        DetermineAction(container);
108      }
109      Console.WriteLine("ended!");
110    }   
111
112    /// <summary>
113    /// Reads and analyzes the Messages from the MessageQueue and starts corresponding actions
114    /// </summary>
115    /// <param name="container">The Container, containing the message</param>
116    private void DetermineAction(MessageContainer container) {           
117      switch (container.Message) {
118        //Server requests to abort a job
119        case MessageContainer.MessageType.AbortJob:
120          if(engines.ContainsKey(container.JobId))
121            engines[container.JobId].Abort();
122          else
123            Logging.Instance.Error(this.ToString(), "AbortJob: Engine doesn't exist");
124          break;
125        //Job has been successfully aborted
126
127
128        case MessageContainer.MessageType.JobAborted:         
129        //todo: thread this
130          Debug.WriteLine("Job aborted, he's dead");
131          lock (engines) {           
132            Guid jobId = new Guid(container.JobId.ToString());
133            if(engines.ContainsKey(jobId)) {
134              appDomains[jobId].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
135              AppDomain.Unload(appDomains[jobId]);
136              appDomains.Remove(jobId);
137              engines.Remove(jobId);
138              jobs.Remove(jobId);
139              GC.Collect();
140            } else
141              Logging.Instance.Error(this.ToString(), "JobAbort: Engine doesn't exist");
142          }
143          break;
144       
145       
146        //Request a Snapshot from the Execution Engine
147        case MessageContainer.MessageType.RequestSnapshot:
148          if (engines.ContainsKey(container.JobId))
149            engines[container.JobId].RequestSnapshot();
150          else
151            Logging.Instance.Error(this.ToString(), "RequestSnapshot: Engine doesn't exist");
152          break;
153       
154       
155        //Snapshot is ready and can be sent back to the Server
156        case MessageContainer.MessageType.SnapshotReady:
157          ThreadPool.QueueUserWorkItem(new WaitCallback(GetSnapshot), container.JobId);         
158          break;
159       
160       
161        //Pull a Job from the Server
162        case MessageContainer.MessageType.FetchJob:
163          if (!currentlyFetching) {
164            wcfService.SendJobAsync(ConfigManager.Instance.GetClientInfo().Id);
165            currentlyFetching = true;
166          }         
167          break;         
168       
169       
170        //A Job has finished and can be sent back to the server
171        case MessageContainer.MessageType.FinishedJob:
172          ThreadPool.QueueUserWorkItem(new WaitCallback(GetFinishedJob), container.JobId);         
173          break;     
174       
175
176        //When the timeslice is up
177        case MessageContainer.MessageType.UptimeLimitDisconnect:
178          Logging.Instance.Info(this.ToString(), "Uptime Limit reached, storing jobs and sending them back");
179
180          //check if there are running jobs
181          if (engines.Count > 0) {
182            //make sure there is no more fetching of jobs while the snapshots get processed
183            currentlyFetching = true;
184            //request a snapshot of each running job
185            foreach (KeyValuePair<Guid, Executor> kvp in engines) {
186              kvp.Value.RequestSnapshot();
187            }
188           
189          } else {
190            //Disconnect afterwards
191            WcfService.Instance.Disconnect();
192          }
193          break;
194       
195       
196        //Hard shutdown of the client
197        case MessageContainer.MessageType.Shutdown:
198          lock (engines) {
199            foreach (KeyValuePair<Guid, AppDomain> kvp in appDomains) {
200              appDomains[kvp.Key].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
201              AppDomain.Unload(kvp.Value);
202            }
203          }
204          abortRequested = true;
205          beat.StopHeartBeat();
206          WcfService.Instance.Logout(ConfigManager.Instance.GetClientInfo().Id);
207          break;
208      }
209    }
210
211    //Asynchronous Threads for interaction with the Execution Engine
212    #region Async Threads for the EE
213   
214    /// <summary>
215    /// serializes the finished job and submits it to the server. If, at the time, a network connection is unavailable, the Job gets stored on the disk.
216    /// once the connection gets reestablished, the job gets submitted
217    /// </summary>
218    /// <param name="jobId"></param>
219    private void GetFinishedJob(object jobId) {
220      Guid jId = (Guid)jobId;     
221      try {
222        if (!engines.ContainsKey(jId)) {
223          Logging.Instance.Error(this.ToString(), "GetFinishedJob: Engine doesn't exist");
224          return;
225        }
226       
227        byte[] sJob = engines[jId].GetFinishedJob();
228
229        if (WcfService.Instance.ConnState == NetworkEnum.WcfConnState.Loggedin) {
230          wcfService.StoreFinishedJobResultAsync(ConfigManager.Instance.GetClientInfo().Id,
231            jId,
232            sJob,
233            1,
234            null,
235            true);
236        } else {
237          JobStorageManager.PersistObjectToDisc(wcfService.ServerIP, wcfService.ServerPort, jId, sJob);
238          lock (engines) {
239            appDomains[jId].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
240            AppDomain.Unload(appDomains[jId]);
241            appDomains.Remove(jId);
242            engines.Remove(jId);
243            jobs.Remove(jId);
244          }
245        }
246      }
247      catch (InvalidStateException ise) {
248        Logging.Instance.Error(this.ToString(), "Exception: ", ise);
249      }
250    }
251
252    private void GetSnapshot(object jobId) {
253      Guid jId = (Guid)jobId;
254      byte[] obj = engines[jId].GetSnapshot();
255      wcfService.ProcessSnapshotSync(ConfigManager.Instance.GetClientInfo().Id,
256        jId,
257        obj,
258        engines[jId].Progress,
259        null);
260
261      //Uptime Limit reached, now is a good time to destroy this jobs.
262      if (!UptimeManager.Instance.isOnline()) {
263        KillAppDomain(jId);       
264        //Still anything running?
265        if (engines.Count == 0)
266          WcfService.Instance.Disconnect();
267     
268      } else {
269        engines[jId].StartOnlyJob();
270      }
271    }
272
273    #endregion
274
275    //Eventhandlers for the communication with the wcf Layer
276    #region wcfService Events
277    /// <summary>
278    /// Login has returned
279    /// </summary>
280    /// <param name="sender"></param>
281    /// <param name="e"></param>
282    void wcfService_LoginCompleted(object sender, LoginCompletedEventArgs e) {
283      if (e.Result.Success) {
284        currentlyFetching = false;
285        Logging.Instance.Info(this.ToString(), "Login completed to Hive Server @ " + DateTime.Now);       
286      } else
287        Logging.Instance.Error(this.ToString(), e.Result.StatusMessage);
288    }   
289
290    /// <summary>
291    /// A new Job from the wcfService has been received and will be started within a AppDomain.
292    /// </summary>
293    /// <param name="sender"></param>
294    /// <param name="e"></param>
295    void wcfService_SendJobCompleted(object sender, SendJobCompletedEventArgs e) {
296      if (e.Result.StatusMessage != ApplicationConstants.RESPONSE_COMMUNICATOR_NO_JOBS_LEFT) {       
297        bool sandboxed = false;
298        List<byte[]> files = new List<byte[]>();
299        foreach (CachedHivePluginInfo plugininfo in PluginCache.Instance.GetPlugins(e.Result.Job.PluginsNeeded))
300          files.AddRange(plugininfo.PluginFiles);
301
302        AppDomain appDomain = PluginManager.Manager.CreateAndInitAppDomainWithSandbox(e.Result.Job.Id.ToString(), sandboxed, null, files);
303        appDomain.UnhandledException += new UnhandledExceptionEventHandler(appDomain_UnhandledException);
304        lock (engines) {
305          if (!jobs.ContainsKey(e.Result.Job.Id)) {
306            jobs.Add(e.Result.Job.Id, e.Result.Job);
307            appDomains.Add(e.Result.Job.Id, appDomain);
308
309            Executor engine = (Executor)appDomain.CreateInstanceAndUnwrap(typeof(Executor).Assembly.GetName().Name, typeof(Executor).FullName);
310            engine.JobId = e.Result.Job.Id;
311            engine.Queue = MessageQueue.GetInstance();           
312            engine.Start(e.Data);
313            engines.Add(e.Result.Job.Id, engine);
314
315            ClientStatusInfo.JobsFetched++;
316
317            Debug.WriteLine("Increment FetchedJobs to:" + ClientStatusInfo.JobsFetched);
318          }
319        }       
320      }
321      currentlyFetching = false;
322    }
323
324    /// <summary>
325    /// A finished job has been stored on the server
326    /// </summary>
327    /// <param name="sender"></param>
328    /// <param name="e"></param>
329    void wcfService_StoreFinishedJobResultCompleted(object sender, StoreFinishedJobResultCompletedEventArgs e) {
330      KillAppDomain(e.Result.JobId);
331      if (e.Result.Success) {           
332        ClientStatusInfo.JobsProcessed++;
333        Debug.WriteLine("ProcessedJobs to:" + ClientStatusInfo.JobsProcessed);               
334      } else {       
335        Logging.Instance.Error(this.ToString(), "Sending of job " + e.Result.JobId + " failed, job has been wasted. Message: " + e.Result.StatusMessage);
336      }
337    }
338
339    /// <summary>
340    /// A snapshot has been stored on the server
341    /// </summary>
342    /// <param name="sender"></param>
343    /// <param name="e"></param>
344    void wcfService_ProcessSnapshotCompleted(object sender, ProcessSnapshotCompletedEventArgs e) {
345      Logging.Instance.Info(this.ToString(), "Snapshot " + e.Result.JobId + " has been transmitted according to plan.");
346    }
347
348    /// <summary>
349    /// The server has been changed. All Appdomains and Jobs must be aborted!
350    /// </summary>
351    /// <param name="sender"></param>
352    /// <param name="e"></param>
353    void wcfService_ServerChanged(object sender, EventArgs e) {
354      Logging.Instance.Info(this.ToString(), "ServerChanged has been called");
355      lock (engines) {
356        foreach (KeyValuePair<Guid, Executor> entries in engines) {
357          engines[entries.Key].Abort();
358          //appDomains[entries.Key].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
359          //AppDomain.Unload(appDomains[entries.Key]);
360        }
361        //appDomains = new Dictionary<Guid, AppDomain>();
362        //engines = new Dictionary<Guid, Executor>();
363        //jobs = new Dictionary<Guid, Job>();
364      }
365    }
366
367    /// <summary>
368    /// Connnection to the server has been estabilshed => Login and Send the persistet Jobs from the harddisk.
369    /// </summary>
370    /// <param name="sender"></param>
371    /// <param name="e"></param>
372    void wcfService_Connected(object sender, EventArgs e) {
373      wcfService.LoginSync(ConfigManager.Instance.GetClientInfo());     
374      JobStorageManager.CheckAndSubmitJobsFromDisc();
375      currentlyFetching = false;
376    }
377
378    //this is a little bit tricky -
379    void wcfService_ConnectionRestored(object sender, EventArgs e) {
380      Logging.Instance.Info(this.ToString(), "Reconnected to old server - checking currently running appdomains");                 
381
382      foreach (KeyValuePair<Guid, Executor> execKVP in engines) {
383        if (!execKVP.Value.Running && execKVP.Value.CurrentMessage == MessageContainer.MessageType.NoMessage) {
384          Logging.Instance.Info(this.ToString(), "Checking for JobId: " + execKVP.Value.JobId);
385          Thread finThread = new Thread(new ParameterizedThreadStart(GetFinishedJob));
386          finThread.Start(execKVP.Value.JobId);
387        }
388      }
389    }
390
391    #endregion
392
393    public Dictionary<Guid, Executor> GetExecutionEngines() {
394      return engines;
395    }
396
397    void appDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) {
398      Logging.Instance.Error(this.ToString(), "Exception in AppDomain: " + e.ExceptionObject.ToString());     
399    }
400
401    internal Dictionary<Guid, Job> GetJobs() {           
402      return jobs;
403    }
404
405    /// <summary>
406    /// Kill a appdomain with a specific id.
407    /// </summary>
408    /// <param name="id">the GUID of the job</param>
409    private void KillAppDomain(Guid id) {
410      lock (engines) {
411        try {
412          appDomains[id].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
413          AppDomain.Unload(appDomains[id]);
414          appDomains.Remove(id);
415          engines.Remove(id);
416          jobs.Remove(id);
417        }       
418        catch (Exception ex) {
419          Logging.Instance.Error(this.ToString(), "Exception when unloading the appdomain: ", ex);
420        }
421      }
422      GC.Collect();
423    }
424  }
425}
Note: See TracBrowser for help on using the repository browser.