Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Hive.Client.Core/3.2/Core.cs @ 1813

Last change on this file since 1813 was 1812, checked in by kgrading, 15 years ago

changed job restart behaviour - the job will now be restarted when the serialization is done, not before (#571)

File size: 13.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Hive.Client.ExecutionEngine;
27using HeuristicLab.Hive.Client.Common;
28using System.Threading;
29using System.Reflection;
30using System.Diagnostics;
31using System.Security.Permissions;
32using System.Security.Policy;
33using System.Security;
34using HeuristicLab.Hive.Client.Communication;
35using HeuristicLab.Hive.Contracts.BusinessObjects;
36using HeuristicLab.Hive.Contracts;
37using System.Runtime.Remoting.Messaging;
38using HeuristicLab.PluginInfrastructure;
39using System.ServiceModel;
40using System.ServiceModel.Description;
41using HeuristicLab.Hive.Client.Core.ClientConsoleService;
42using HeuristicLab.Hive.Client.Core.ConfigurationManager;
43using HeuristicLab.Hive.Client.Communication.ServerService;
44using HeuristicLab.Hive.JobBase;
45using HeuristicLab.Hive.Client.Core.JobStorage;
46
47namespace HeuristicLab.Hive.Client.Core {
48  /// <summary>
49  /// The core component of the Hive Client
50  /// </summary>
51  public class Core: MarshalByRefObject {       
52    public static bool abortRequested { get; set; }
53    private bool currentlyFetching = false;
54
55    private Dictionary<Guid, Executor> engines = new Dictionary<Guid, Executor>();
56    private Dictionary<Guid, AppDomain> appDomains = new Dictionary<Guid, AppDomain>();
57    private Dictionary<Guid, Job> jobs = new Dictionary<Guid, Job>();
58
59    private WcfService wcfService;
60    private Heartbeat beat;
61   
62    /// <summary>
63    /// Main Method for the client
64    /// </summary>
65    public void Start() {     
66      abortRequested = false;
67      PluginManager.Manager.Initialize();
68      Logging.Instance.Info(this.ToString(), "Hive Client started");
69      ClientConsoleServer server = new ClientConsoleServer();
70      server.StartClientConsoleServer(new Uri("net.tcp://127.0.0.1:8000/ClientConsole/"));
71
72      ConfigManager manager = ConfigManager.Instance;
73      manager.Core = this;
74     
75      //Register all Wcf Service references
76      wcfService = WcfService.Instance;
77      wcfService.LoginCompleted += new EventHandler<LoginCompletedEventArgs>(wcfService_LoginCompleted);
78      wcfService.SendJobCompleted += new EventHandler<SendJobCompletedEventArgs>(wcfService_SendJobCompleted);
79      wcfService.StoreFinishedJobResultCompleted += new EventHandler<StoreFinishedJobResultCompletedEventArgs>(wcfService_StoreFinishedJobResultCompleted);
80      wcfService.ProcessSnapshotCompleted += new EventHandler<ProcessSnapshotCompletedEventArgs>(wcfService_ProcessSnapshotCompleted);
81      wcfService.ConnectionRestored += new EventHandler(wcfService_ConnectionRestored);
82      wcfService.ServerChanged += new EventHandler(wcfService_ServerChanged);
83      wcfService.Connected += new EventHandler(wcfService_Connected);
84      //Recover Server IP and Port from the Settings Framework
85      ConnectionContainer cc = ConfigManager.Instance.GetServerIPAndPort();     
86      if (cc.IPAdress != String.Empty && cc.Port != 0) {
87        wcfService.Connect(cc.IPAdress, cc.Port);
88      }
89   
90      //Initialize the heartbeat
91      beat = new Heartbeat { Interval = 10000 };
92      beat.StartHeartbeat();     
93
94      MessageQueue queue = MessageQueue.GetInstance();
95     
96      //Main processing loop     
97      //Todo: own thread for message handling
98      //Rly?!
99      while (!abortRequested) {
100        MessageContainer container = queue.GetMessage();
101        Debug.WriteLine("Main loop received this message: " + container.Message.ToString());
102        Logging.Instance.Info(this.ToString(), container.Message.ToString());
103        DetermineAction(container);
104      }
105      Console.WriteLine("ended!");
106    }   
107
108    /// <summary>
109    /// Reads and analyzes the Messages from the MessageQueue and starts corresponding actions
110    /// </summary>
111    /// <param name="container">The Container, containing the message</param>
112    private void DetermineAction(MessageContainer container) {           
113      switch (container.Message) {
114        //Server requests to abort a job
115        case MessageContainer.MessageType.AbortJob:
116          engines[container.JobId].Abort();
117          break;
118        //Job has been successfully aborted
119        case MessageContainer.MessageType.JobAborted:
120          //todo: thread this
121          Debug.WriteLine("Job aborted, he's dead");
122          lock (engines) {
123            AppDomain.Unload(appDomains[container.JobId]);
124            appDomains.Remove(container.JobId);
125            engines.Remove(container.JobId);
126            jobs.Remove(container.JobId);
127          }
128          break;
129        //Request a Snapshot from the Execution Engine
130        case MessageContainer.MessageType.RequestSnapshot:
131          engines[container.JobId].RequestSnapshot();
132          break;
133        //Snapshot is ready and can be sent back to the Server
134        case MessageContainer.MessageType.SnapshotReady:
135          ThreadPool.QueueUserWorkItem(new WaitCallback(GetSnapshot), container.JobId);         
136          break;
137        //Pull a Job from the Server
138        case MessageContainer.MessageType.FetchJob:
139          if (!currentlyFetching) {
140            wcfService.SendJobAsync(ConfigManager.Instance.GetClientInfo().Id);
141            currentlyFetching = true;
142          }         
143          break;         
144        //A Job has finished and can be sent back to the server
145        case MessageContainer.MessageType.FinishedJob:
146          ThreadPool.QueueUserWorkItem(new WaitCallback(GetFinishedJob), container.JobId);         
147          break;     
148        //Hard shutdown of the client
149        case MessageContainer.MessageType.Shutdown:
150          lock (engines) {
151            foreach (KeyValuePair<Guid, AppDomain> kvp in appDomains)
152              AppDomain.Unload(kvp.Value);
153          }
154          abortRequested = true;
155          beat.StopHeartBeat();
156          WcfService.Instance.Logout(ConfigManager.Instance.GetClientInfo().Id);
157          break;
158      }
159    }
160
161    //Asynchronous Threads for interaction with the Execution Engine
162    #region Async Threads for the EE
163   
164    private void GetFinishedJob(object jobId) {
165      Guid jId = (Guid)jobId;     
166      try {
167        byte[] sJob = engines[jId].GetFinishedJob();
168
169        if (WcfService.Instance.ConnState == NetworkEnum.WcfConnState.Loggedin) {
170          wcfService.StoreFinishedJobResultAsync(ConfigManager.Instance.GetClientInfo().Id,
171            jId,
172            sJob,
173            1,
174            null,
175            true);
176        } else {         
177          JobStorageManager.PersistObjectToDisc(wcfService.ServerIP, wcfService.ServerPort, jId, sJob);
178          lock (engines) {
179            AppDomain.Unload(appDomains[jId]);
180            appDomains.Remove(jId);
181            engines.Remove(jId);
182            jobs.Remove(jId);
183          }
184        }
185      }
186      catch (InvalidStateException ise) {
187        Logging.Instance.Error(this.ToString(), "Exception: ", ise);
188      }
189    }
190
191    private void GetSnapshot(object jobId) {
192      Guid jId = (Guid)jobId;
193      byte[] obj = engines[jId].GetSnapshot();
194      wcfService.ProcessSnapshotSync(ConfigManager.Instance.GetClientInfo().Id,
195        jId,
196        obj,
197        engines[jId].Progress,
198        null);
199      engines[jId].StartOnlyJob();
200    }
201
202    #endregion
203
204    //Eventhandlers for the communication with the wcf Layer
205    #region wcfService Events
206
207    void wcfService_LoginCompleted(object sender, LoginCompletedEventArgs e) {
208      if (e.Result.Success) {
209        Logging.Instance.Info(this.ToString(), "Login completed to Hive Server @ " + DateTime.Now);       
210      } else
211        Logging.Instance.Error(this.ToString(), e.Result.StatusMessage);
212    }   
213
214    void wcfService_SendJobCompleted(object sender, SendJobCompletedEventArgs e) {
215      if (e.Result.StatusMessage != ApplicationConstants.RESPONSE_COMMUNICATOR_NO_JOBS_LEFT) {       
216        bool sandboxed = false;
217        //todo: For testing!!!
218        //beat.StopHeartBeat();       
219        //Todo: make a set & override the equals method
220        List<byte[]> files = new List<byte[]>();
221        //foreach (CachedHivePluginInfo plugininfo in PluginCache.Instance.GetPlugins(e.Result.Job.PluginsNeeded))
222        //  files.AddRange(plugininfo.PluginFiles);
223       
224        AppDomain appDomain = PluginManager.Manager.CreateAndInitAppDomainWithSandbox(e.Result.Job.Id.ToString(), sandboxed, null, files);
225        appDomain.UnhandledException += new UnhandledExceptionEventHandler(appDomain_UnhandledException);
226        lock (engines) {                   
227          if (!jobs.ContainsKey(e.Result.Job.Id)) {
228            jobs.Add(e.Result.Job.Id, e.Result.Job);
229            appDomains.Add(e.Result.Job.Id, appDomain);
230
231            Executor engine = (Executor)appDomain.CreateInstanceAndUnwrap(typeof(Executor).Assembly.GetName().Name, typeof(Executor).FullName);
232            engine.JobId = e.Result.Job.Id;
233            engine.Queue = MessageQueue.GetInstance();
234            engine.Start(e.Result.Job.SerializedJob);
235            engines.Add(e.Result.Job.Id, engine);
236
237            ClientStatusInfo.JobsFetched++;
238
239            Debug.WriteLine("Increment FetchedJobs to:" + ClientStatusInfo.JobsFetched);
240          }
241        }       
242      }
243      currentlyFetching = false;
244    }
245   
246
247    void wcfService_StoreFinishedJobResultCompleted(object sender, StoreFinishedJobResultCompletedEventArgs e) {
248      lock(engines) {
249        try {
250          AppDomain.Unload(appDomains[e.Result.JobId]);
251          appDomains.Remove(e.Result.JobId);
252          engines.Remove(e.Result.JobId);
253          jobs.Remove(e.Result.JobId);
254        }
255        catch (Exception ex) {
256          Logging.Instance.Error(this.ToString(), "Exception when unloading the appdomain: ", ex);
257        }
258      }
259      if (e.Result.Success) {       
260     
261        //if the engine is running again -> we sent an snapshot. Otherwise the job was finished
262        //this method has a risk concerning race conditions.
263        //better expand the sendjobresultcompltedeventargs with a boolean "snapshot?" flag
264
265        ClientStatusInfo.JobsProcessed++;
266        Debug.WriteLine("ProcessedJobs to:" + ClientStatusInfo.JobsProcessed);               
267      } else {       
268        Logging.Instance.Error(this.ToString(), "Sending of job " + e.Result.JobId + " failed, job has been wasted. Message: " + e.Result.StatusMessage);
269      }
270    }
271
272    void wcfService_ProcessSnapshotCompleted(object sender, ProcessSnapshotCompletedEventArgs e) {
273      Logging.Instance.Info(this.ToString(), "Snapshot " + e.Result.JobId + " has been transmitted according to plan.");
274    }
275
276    //Todo: First stop all threads, then terminate
277    void wcfService_ServerChanged(object sender, EventArgs e) {
278      Logging.Instance.Info(this.ToString(), "ServerChanged has been called");
279      lock (engines) {
280        foreach (KeyValuePair<Guid, AppDomain> entries in appDomains)
281          AppDomain.Unload(appDomains[entries.Key]);
282        appDomains = new Dictionary<Guid, AppDomain>();
283        engines = new Dictionary<Guid, Executor>();
284      }
285    }
286
287    void wcfService_Connected(object sender, EventArgs e) {
288      wcfService.LoginSync(ConfigManager.Instance.GetClientInfo());
289      JobStorageManager.CheckAndSubmitJobsFromDisc();
290    }
291
292    //this is a little bit tricky -
293    void wcfService_ConnectionRestored(object sender, EventArgs e) {
294      Logging.Instance.Info(this.ToString(), "Reconnected to old server - checking currently running appdomains");                 
295
296      foreach (KeyValuePair<Guid, Executor> execKVP in engines) {
297        if (!execKVP.Value.Running && execKVP.Value.CurrentMessage == MessageContainer.MessageType.NoMessage) {
298          Logging.Instance.Info(this.ToString(), "Checking for JobId: " + execKVP.Value.JobId);
299          Thread finThread = new Thread(new ParameterizedThreadStart(GetFinishedJob));
300          finThread.Start(execKVP.Value.JobId);
301        }
302      }
303    }
304
305    #endregion
306
307    public Dictionary<Guid, Executor> GetExecutionEngines() {
308      return engines;
309    }
310
311    void appDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) {
312      Logging.Instance.Error(this.ToString(), "Exception in AppDomain: " + e.ExceptionObject.ToString());
313     
314    }
315
316    internal Dictionary<Guid, Job> GetJobs() {
317      return jobs;
318    }
319  }
320}
Note: See TracBrowser for help on using the repository browser.