Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Hive.Client.Core/3.2/Core.cs @ 2115

Last change on this file since 2115 was 2092, checked in by svonolfe, 15 years ago

Small refactoring (#372)

File size: 17.0 KB
RevLine 
[735]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[714]23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
[768]26using HeuristicLab.Hive.Client.ExecutionEngine;
[735]27using HeuristicLab.Hive.Client.Common;
[768]28using System.Threading;
[770]29using System.Reflection;
30using System.Diagnostics;
31using System.Security.Permissions;
32using System.Security.Policy;
33using System.Security;
[790]34using HeuristicLab.Hive.Client.Communication;
[793]35using HeuristicLab.Hive.Contracts.BusinessObjects;
36using HeuristicLab.Hive.Contracts;
[804]37using System.Runtime.Remoting.Messaging;
[816]38using HeuristicLab.PluginInfrastructure;
[843]39using System.ServiceModel;
40using System.ServiceModel.Description;
[919]41using HeuristicLab.Hive.Client.Core.ClientConsoleService;
[932]42using HeuristicLab.Hive.Client.Core.ConfigurationManager;
[993]43using HeuristicLab.Hive.Client.Communication.ServerService;
[1001]44using HeuristicLab.Hive.JobBase;
[1364]45using HeuristicLab.Hive.Client.Core.JobStorage;
[714]46
47namespace HeuristicLab.Hive.Client.Core {
[1132]48  /// <summary>
49  /// The core component of the Hive Client
50  /// </summary>
[1379]51  public class Core: MarshalByRefObject {       
[1368]52    public static bool abortRequested { get; set; }
[1719]53    private bool currentlyFetching = false;
[1005]54
[1449]55    private Dictionary<Guid, Executor> engines = new Dictionary<Guid, Executor>();
56    private Dictionary<Guid, AppDomain> appDomains = new Dictionary<Guid, AppDomain>();
57    private Dictionary<Guid, Job> jobs = new Dictionary<Guid, Job>();
58
[923]59    private WcfService wcfService;
[1097]60    private Heartbeat beat;
[1132]61   
62    /// <summary>
63    /// Main Method for the client
64    /// </summary>
[1379]65    public void Start() {     
[1368]66      abortRequested = false;
[1755]67      PluginManager.Manager.Initialize();
[1371]68      Logging.Instance.Info(this.ToString(), "Hive Client started");
[901]69      ClientConsoleServer server = new ClientConsoleServer();
70      server.StartClientConsoleServer(new Uri("net.tcp://127.0.0.1:8000/ClientConsole/"));
[843]71
[932]72      ConfigManager manager = ConfigManager.Instance;
[908]73      manager.Core = this;
[1959]74
75
[1132]76     
77      //Register all Wcf Service references
[923]78      wcfService = WcfService.Instance;
[1036]79      wcfService.LoginCompleted += new EventHandler<LoginCompletedEventArgs>(wcfService_LoginCompleted);
[1379]80      wcfService.SendJobCompleted += new EventHandler<SendJobCompletedEventArgs>(wcfService_SendJobCompleted);
81      wcfService.StoreFinishedJobResultCompleted += new EventHandler<StoreFinishedJobResultCompletedEventArgs>(wcfService_StoreFinishedJobResultCompleted);
82      wcfService.ProcessSnapshotCompleted += new EventHandler<ProcessSnapshotCompletedEventArgs>(wcfService_ProcessSnapshotCompleted);
[1036]83      wcfService.ConnectionRestored += new EventHandler(wcfService_ConnectionRestored);
84      wcfService.ServerChanged += new EventHandler(wcfService_ServerChanged);
[1081]85      wcfService.Connected += new EventHandler(wcfService_Connected);
[1132]86      //Recover Server IP and Port from the Settings Framework
87      ConnectionContainer cc = ConfigManager.Instance.GetServerIPAndPort();     
[2025]88      if (cc.IPAdress != String.Empty && cc.Port != 0)
89        wcfService.SetIPAndPort(cc.IPAdress, cc.Port);
90
91      if (UptimeManager.Instance.isOnline())
92        wcfService.Connect();
93         
[1132]94      //Initialize the heartbeat
[1097]95      beat = new Heartbeat { Interval = 10000 };
[841]96      beat.StartHeartbeat();     
97
[735]98      MessageQueue queue = MessageQueue.GetInstance();
[1132]99     
[1340]100      //Main processing loop     
101      //Todo: own thread for message handling
[1368]102      //Rly?!
103      while (!abortRequested) {
[735]104        MessageContainer container = queue.GetMessage();
[779]105        Debug.WriteLine("Main loop received this message: " + container.Message.ToString());
[1371]106        Logging.Instance.Info(this.ToString(), container.Message.ToString());
[768]107        DetermineAction(container);
[735]108      }
[1481]109      Console.WriteLine("ended!");
[1379]110    }   
[768]111
[1132]112    /// <summary>
113    /// Reads and analyzes the Messages from the MessageQueue and starts corresponding actions
114    /// </summary>
115    /// <param name="container">The Container, containing the message</param>
[1368]116    private void DetermineAction(MessageContainer container) {           
[779]117      switch (container.Message) {
[1132]118        //Server requests to abort a job
[779]119        case MessageContainer.MessageType.AbortJob:
[1830]120          if(engines.ContainsKey(container.JobId))
121            engines[container.JobId].Abort();
122          else
123            Logging.Instance.Error(this.ToString(), "AbortJob: Engine doesn't exist");
[779]124          break;
[1132]125        //Job has been successfully aborted
[2025]126
127
128        case MessageContainer.MessageType.JobAborted:         
129        //todo: thread this
[1775]130          Debug.WriteLine("Job aborted, he's dead");
[1830]131          lock (engines) {           
132            Guid jobId = new Guid(container.JobId.ToString());
133            if(engines.ContainsKey(jobId)) {
134              appDomains[jobId].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
135              AppDomain.Unload(appDomains[jobId]);
136              appDomains.Remove(jobId);
137              engines.Remove(jobId);
138              jobs.Remove(jobId);
139              GC.Collect();
140            } else
141              Logging.Instance.Error(this.ToString(), "JobAbort: Engine doesn't exist");
[1775]142          }
[779]143          break;
[2025]144       
145       
[1132]146        //Request a Snapshot from the Execution Engine
[779]147        case MessageContainer.MessageType.RequestSnapshot:
[1830]148          if (engines.ContainsKey(container.JobId))
149            engines[container.JobId].RequestSnapshot();
150          else
151            Logging.Instance.Error(this.ToString(), "RequestSnapshot: Engine doesn't exist");
[779]152          break;
[2025]153       
154       
[1132]155        //Snapshot is ready and can be sent back to the Server
[779]156        case MessageContainer.MessageType.SnapshotReady:
[1379]157          ThreadPool.QueueUserWorkItem(new WaitCallback(GetSnapshot), container.JobId);         
[779]158          break;
[2025]159       
160       
[1132]161        //Pull a Job from the Server
[1719]162        case MessageContainer.MessageType.FetchJob:
163          if (!currentlyFetching) {
164            wcfService.SendJobAsync(ConfigManager.Instance.GetClientInfo().Id);
[1775]165            currentlyFetching = true;
[1719]166          }         
[811]167          break;         
[2025]168       
169       
[1132]170        //A Job has finished and can be sent back to the server
[779]171        case MessageContainer.MessageType.FinishedJob:
[1379]172          ThreadPool.QueueUserWorkItem(new WaitCallback(GetFinishedJob), container.JobId);         
[1085]173          break;     
[2025]174       
[2068]175
176        //When the timeslice is up
[2025]177        case MessageContainer.MessageType.UptimeLimitDisconnect:
178          Logging.Instance.Info(this.ToString(), "Uptime Limit reached, storing jobs and sending them back");
[2063]179
180          //check if there are running jobs
181          if (engines.Count > 0) {
182            //make sure there is no more fetching of jobs while the snapshots get processed
183            currentlyFetching = true;
184            //request a snapshot of each running job
185            foreach (KeyValuePair<Guid, Executor> kvp in engines) {
186              kvp.Value.RequestSnapshot();
187            }
188           
189          } else {
190            //Disconnect afterwards
191            WcfService.Instance.Disconnect();
192          }
[2025]193          break;
194       
195       
[1132]196        //Hard shutdown of the client
[1085]197        case MessageContainer.MessageType.Shutdown:
[1481]198          lock (engines) {
[1830]199            foreach (KeyValuePair<Guid, AppDomain> kvp in appDomains) {
200              appDomains[kvp.Key].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
[1481]201              AppDomain.Unload(kvp.Value);
[1830]202            }
[1481]203          }
[1368]204          abortRequested = true;
[1097]205          beat.StopHeartBeat();
[1635]206          WcfService.Instance.Logout(ConfigManager.Instance.GetClientInfo().Id);
[1085]207          break;
[779]208      }
209    }
[790]210
[1132]211    //Asynchronous Threads for interaction with the Execution Engine
[923]212    #region Async Threads for the EE
213   
[2068]214    /// <summary>
215    /// serializes the finished job and submits it to the server. If, at the time, a network connection is unavailable, the Job gets stored on the disk.
216    /// once the connection gets reestablished, the job gets submitted
217    /// </summary>
218    /// <param name="jobId"></param>
[811]219    private void GetFinishedJob(object jobId) {
[1449]220      Guid jId = (Guid)jobId;     
[1368]221      try {
[1830]222        if (!engines.ContainsKey(jId)) {
223          Logging.Instance.Error(this.ToString(), "GetFinishedJob: Engine doesn't exist");
224          return;
225        }
226       
[1368]227        byte[] sJob = engines[jId].GetFinishedJob();
[1005]228
[1368]229        if (WcfService.Instance.ConnState == NetworkEnum.WcfConnState.Loggedin) {
[1449]230          wcfService.StoreFinishedJobResultAsync(ConfigManager.Instance.GetClientInfo().Id,
[1368]231            jId,
232            sJob,
233            1,
234            null,
235            true);
[1830]236        } else {
[1368]237          JobStorageManager.PersistObjectToDisc(wcfService.ServerIP, wcfService.ServerPort, jId, sJob);
[1379]238          lock (engines) {
[1830]239            appDomains[jId].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
[1368]240            AppDomain.Unload(appDomains[jId]);
241            appDomains.Remove(jId);
242            engines.Remove(jId);
243            jobs.Remove(jId);
244          }
245        }
[1219]246      }
[1368]247      catch (InvalidStateException ise) {
[1371]248        Logging.Instance.Error(this.ToString(), "Exception: ", ise);
[1368]249      }
[804]250    }
251
[811]252    private void GetSnapshot(object jobId) {
[1449]253      Guid jId = (Guid)jobId;
[816]254      byte[] obj = engines[jId].GetSnapshot();
[1812]255      wcfService.ProcessSnapshotSync(ConfigManager.Instance.GetClientInfo().Id,
[1147]256        jId,
257        obj,
258        engines[jId].Progress,
[1812]259        null);
[2063]260
261      //Uptime Limit reached, now is a good time to destroy this jobs.
262      if (!UptimeManager.Instance.isOnline()) {
[2068]263        KillAppDomain(jId);       
[2063]264        //Still anything running?
265        if (engines.Count == 0)
266          WcfService.Instance.Disconnect();
267     
268      } else {
269        engines[jId].StartOnlyJob();
270      }
[811]271    }
272
[923]273    #endregion
274
[1132]275    //Eventhandlers for the communication with the wcf Layer
[923]276    #region wcfService Events
[2068]277    /// <summary>
278    /// Login has returned
279    /// </summary>
280    /// <param name="sender"></param>
281    /// <param name="e"></param>
[923]282    void wcfService_LoginCompleted(object sender, LoginCompletedEventArgs e) {
283      if (e.Result.Success) {
[2063]284        currentlyFetching = false;
[1371]285        Logging.Instance.Info(this.ToString(), "Login completed to Hive Server @ " + DateTime.Now);       
[923]286      } else
[1371]287        Logging.Instance.Error(this.ToString(), e.Result.StatusMessage);
[923]288    }   
289
[2068]290    /// <summary>
291    /// A new Job from the wcfService has been received and will be started within a AppDomain.
292    /// </summary>
293    /// <param name="sender"></param>
294    /// <param name="e"></param>
[1379]295    void wcfService_SendJobCompleted(object sender, SendJobCompletedEventArgs e) {
[1755]296      if (e.Result.StatusMessage != ApplicationConstants.RESPONSE_COMMUNICATOR_NO_JOBS_LEFT) {       
[1487]297        bool sandboxed = false;
[1602]298        List<byte[]> files = new List<byte[]>();
[2082]299        foreach (CachedHivePluginInfo plugininfo in PluginCache.Instance.GetPlugins(e.Result.Job.JobInfo.PluginsNeeded))
[1936]300          files.AddRange(plugininfo.PluginFiles);
[2082]301
302        AppDomain appDomain = PluginManager.Manager.CreateAndInitAppDomainWithSandbox(e.Result.Job.JobInfo.Id.ToString(), sandboxed, null, files);
[997]303        appDomain.UnhandledException += new UnhandledExceptionEventHandler(appDomain_UnhandledException);
[2082]304        lock (engines) {
305          if (!jobs.ContainsKey(e.Result.Job.JobInfo.Id)) {
306            jobs.Add(e.Result.Job.JobInfo.Id, e.Result.Job.JobInfo);
307            appDomains.Add(e.Result.Job.JobInfo.Id, appDomain);
[997]308
[1033]309            Executor engine = (Executor)appDomain.CreateInstanceAndUnwrap(typeof(Executor).Assembly.GetName().Name, typeof(Executor).FullName);
[2082]310            engine.JobId = e.Result.Job.JobInfo.Id;
[1936]311            engine.Queue = MessageQueue.GetInstance();           
[2092]312            engine.Start(e.Result.Job.SerializedJobData);
[2082]313            engines.Add(e.Result.Job.JobInfo.Id, engine);
[798]314
[1033]315            ClientStatusInfo.JobsFetched++;
[798]316
[1033]317            Debug.WriteLine("Increment FetchedJobs to:" + ClientStatusInfo.JobsFetched);
318          }
[1775]319        }       
[960]320      }
[1775]321      currentlyFetching = false;
[798]322    }
[1379]323
[2068]324    /// <summary>
325    /// A finished job has been stored on the server
326    /// </summary>
327    /// <param name="sender"></param>
328    /// <param name="e"></param>
[1379]329    void wcfService_StoreFinishedJobResultCompleted(object sender, StoreFinishedJobResultCompletedEventArgs e) {
[2068]330      KillAppDomain(e.Result.JobId);
331      if (e.Result.Success) {           
[1589]332        ClientStatusInfo.JobsProcessed++;
333        Debug.WriteLine("ProcessedJobs to:" + ClientStatusInfo.JobsProcessed);               
[1371]334      } else {       
[1589]335        Logging.Instance.Error(this.ToString(), "Sending of job " + e.Result.JobId + " failed, job has been wasted. Message: " + e.Result.StatusMessage);
[840]336      }
[779]337    }
[908]338
[2068]339    /// <summary>
340    /// A snapshot has been stored on the server
341    /// </summary>
342    /// <param name="sender"></param>
343    /// <param name="e"></param>
[1379]344    void wcfService_ProcessSnapshotCompleted(object sender, ProcessSnapshotCompletedEventArgs e) {
[1589]345      Logging.Instance.Info(this.ToString(), "Snapshot " + e.Result.JobId + " has been transmitted according to plan.");
[1379]346    }
347
[2068]348    /// <summary>
349    /// The server has been changed. All Appdomains and Jobs must be aborted!
350    /// </summary>
351    /// <param name="sender"></param>
352    /// <param name="e"></param>
[932]353    void wcfService_ServerChanged(object sender, EventArgs e) {
[1371]354      Logging.Instance.Info(this.ToString(), "ServerChanged has been called");
[1379]355      lock (engines) {
[2068]356        foreach (KeyValuePair<Guid, Executor> entries in engines) {
357          engines[entries.Key].Abort();
358          //appDomains[entries.Key].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
359          //AppDomain.Unload(appDomains[entries.Key]);
[1830]360        }
[2068]361        //appDomains = new Dictionary<Guid, AppDomain>();
362        //engines = new Dictionary<Guid, Executor>();
363        //jobs = new Dictionary<Guid, Job>();
[1081]364      }
365    }
366
[2068]367    /// <summary>
368    /// Connnection to the server has been estabilshed => Login and Send the persistet Jobs from the harddisk.
369    /// </summary>
370    /// <param name="sender"></param>
371    /// <param name="e"></param>
[1081]372    void wcfService_Connected(object sender, EventArgs e) {
[2068]373      wcfService.LoginSync(ConfigManager.Instance.GetClientInfo());     
[1364]374      JobStorageManager.CheckAndSubmitJobsFromDisc();
[2068]375      currentlyFetching = false;
[932]376    }
377
[1097]378    //this is a little bit tricky -
[1083]379    void wcfService_ConnectionRestored(object sender, EventArgs e) {
[1371]380      Logging.Instance.Info(this.ToString(), "Reconnected to old server - checking currently running appdomains");                 
[1097]381
[1449]382      foreach (KeyValuePair<Guid, Executor> execKVP in engines) {
[1097]383        if (!execKVP.Value.Running && execKVP.Value.CurrentMessage == MessageContainer.MessageType.NoMessage) {
[1371]384          Logging.Instance.Info(this.ToString(), "Checking for JobId: " + execKVP.Value.JobId);
[1097]385          Thread finThread = new Thread(new ParameterizedThreadStart(GetFinishedJob));
386          finThread.Start(execKVP.Value.JobId);
387        }
388      }
[1083]389    }
[932]390
[923]391    #endregion
392
[1449]393    public Dictionary<Guid, Executor> GetExecutionEngines() {
[908]394      return engines;
395    }
[997]396
397    void appDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) {
[1936]398      Logging.Instance.Error(this.ToString(), "Exception in AppDomain: " + e.ExceptionObject.ToString());     
[997]399    }
[1755]400
[1936]401    internal Dictionary<Guid, Job> GetJobs() {           
[1755]402      return jobs;
403    }
[2068]404
405    /// <summary>
406    /// Kill a appdomain with a specific id.
407    /// </summary>
408    /// <param name="id">the GUID of the job</param>
409    private void KillAppDomain(Guid id) {
410      lock (engines) {
411        try {
412          appDomains[id].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
413          AppDomain.Unload(appDomains[id]);
414          appDomains.Remove(id);
415          engines.Remove(id);
416          jobs.Remove(id);
417        }       
418        catch (Exception ex) {
419          Logging.Instance.Error(this.ToString(), "Exception when unloading the appdomain: ", ex);
420        }
421      }
422      GC.Collect();
423    }
[714]424  }
425}
Note: See TracBrowser for help on using the repository browser.