Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Hive.Client.Core/3.2/Core.cs @ 2948

Last change on this file since 2948 was 2591, checked in by gkronber, 15 years ago

Copied refactored plugin infrastructure from branch and merged changeset r2586:2589 from branch into the trunk. #799

File size: 18.0 KB
RevLine 
[735]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[714]23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
[768]26using HeuristicLab.Hive.Client.ExecutionEngine;
[735]27using HeuristicLab.Hive.Client.Common;
[768]28using System.Threading;
[770]29using System.Reflection;
30using System.Diagnostics;
31using System.Security.Permissions;
32using System.Security.Policy;
33using System.Security;
[790]34using HeuristicLab.Hive.Client.Communication;
[793]35using HeuristicLab.Hive.Contracts.BusinessObjects;
36using HeuristicLab.Hive.Contracts;
[804]37using System.Runtime.Remoting.Messaging;
[816]38using HeuristicLab.PluginInfrastructure;
[843]39using System.ServiceModel;
40using System.ServiceModel.Description;
[919]41using HeuristicLab.Hive.Client.Core.ClientConsoleService;
[932]42using HeuristicLab.Hive.Client.Core.ConfigurationManager;
[993]43using HeuristicLab.Hive.Client.Communication.ServerService;
[1001]44using HeuristicLab.Hive.JobBase;
[1364]45using HeuristicLab.Hive.Client.Core.JobStorage;
[714]46
47namespace HeuristicLab.Hive.Client.Core {
[1132]48  /// <summary>
49  /// The core component of the Hive Client
50  /// </summary>
[2591]51  public class Core : MarshalByRefObject {
[1368]52    public static bool abortRequested { get; set; }
[1719]53    private bool currentlyFetching = false;
[1005]54
[1449]55    private Dictionary<Guid, Executor> engines = new Dictionary<Guid, Executor>();
56    private Dictionary<Guid, AppDomain> appDomains = new Dictionary<Guid, AppDomain>();
57    private Dictionary<Guid, Job> jobs = new Dictionary<Guid, Job>();
58
[923]59    private WcfService wcfService;
[1097]60    private Heartbeat beat;
[2591]61
[1132]62    /// <summary>
63    /// Main Method for the client
64    /// </summary>
[2591]65    public void Start() {
[1368]66      abortRequested = false;
[1371]67      Logging.Instance.Info(this.ToString(), "Hive Client started");
[901]68      ClientConsoleServer server = new ClientConsoleServer();
69      server.StartClientConsoleServer(new Uri("net.tcp://127.0.0.1:8000/ClientConsole/"));
[843]70
[932]71      ConfigManager manager = ConfigManager.Instance;
[908]72      manager.Core = this;
[1959]73
74
[2591]75
[1132]76      //Register all Wcf Service references
[923]77      wcfService = WcfService.Instance;
[1036]78      wcfService.LoginCompleted += new EventHandler<LoginCompletedEventArgs>(wcfService_LoginCompleted);
[1379]79      wcfService.SendJobCompleted += new EventHandler<SendJobCompletedEventArgs>(wcfService_SendJobCompleted);
80      wcfService.StoreFinishedJobResultCompleted += new EventHandler<StoreFinishedJobResultCompletedEventArgs>(wcfService_StoreFinishedJobResultCompleted);
81      wcfService.ProcessSnapshotCompleted += new EventHandler<ProcessSnapshotCompletedEventArgs>(wcfService_ProcessSnapshotCompleted);
[1036]82      wcfService.ConnectionRestored += new EventHandler(wcfService_ConnectionRestored);
83      wcfService.ServerChanged += new EventHandler(wcfService_ServerChanged);
[1081]84      wcfService.Connected += new EventHandler(wcfService_Connected);
[1132]85      //Recover Server IP and Port from the Settings Framework
[2591]86      ConnectionContainer cc = ConfigManager.Instance.GetServerIPAndPort();
[2025]87      if (cc.IPAdress != String.Empty && cc.Port != 0)
88        wcfService.SetIPAndPort(cc.IPAdress, cc.Port);
89
90      if (UptimeManager.Instance.isOnline())
91        wcfService.Connect();
[2591]92
[1132]93      //Initialize the heartbeat
[1097]94      beat = new Heartbeat { Interval = 10000 };
[2591]95      beat.StartHeartbeat();
[841]96
[735]97      MessageQueue queue = MessageQueue.GetInstance();
[2591]98
[1340]99      //Main processing loop     
100      //Todo: own thread for message handling
[1368]101      //Rly?!
102      while (!abortRequested) {
[2588]103        MessageContainer container = queue.GetMessage();       
[768]104        DetermineAction(container);
[735]105      }
[1481]106      Console.WriteLine("ended!");
[2591]107    }
[768]108
[1132]109    /// <summary>
110    /// Reads and analyzes the Messages from the MessageQueue and starts corresponding actions
111    /// </summary>
112    /// <param name="container">The Container, containing the message</param>
[2588]113    private void DetermineAction(MessageContainer container) {
114      Logging.Instance.Info(this.ToString(), "Message: " + container.Message.ToString() + " for job: " + container.JobId);       
[779]115      switch (container.Message) {
[1132]116        //Server requests to abort a job
[2588]117        case MessageContainer.MessageType.AbortJob:         
[2591]118          if (engines.ContainsKey(container.JobId))
[1830]119            engines[container.JobId].Abort();
120          else
121            Logging.Instance.Error(this.ToString(), "AbortJob: Engine doesn't exist");
[779]122          break;
[1132]123        //Job has been successfully aborted
[2025]124
125
126        case MessageContainer.MessageType.JobAborted:         
[2588]127        //todo: thread this         
[2591]128          lock (engines) {
[1830]129            Guid jobId = new Guid(container.JobId.ToString());
[2591]130            if (engines.ContainsKey(jobId)) {
[1830]131              appDomains[jobId].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
132              AppDomain.Unload(appDomains[jobId]);
133              appDomains.Remove(jobId);
134              engines.Remove(jobId);
135              jobs.Remove(jobId);
136              GC.Collect();
137            } else
138              Logging.Instance.Error(this.ToString(), "JobAbort: Engine doesn't exist");
[1775]139          }
[779]140          break;
[2591]141
142
[1132]143        //Request a Snapshot from the Execution Engine
[2588]144        case MessageContainer.MessageType.RequestSnapshot:         
[2591]145          if (engines.ContainsKey(container.JobId))
[1830]146            engines[container.JobId].RequestSnapshot();
147          else
148            Logging.Instance.Error(this.ToString(), "RequestSnapshot: Engine doesn't exist");
[779]149          break;
[2591]150
151
[1132]152        //Snapshot is ready and can be sent back to the Server
[2588]153        case MessageContainer.MessageType.SnapshotReady:         
[2591]154          ThreadPool.QueueUserWorkItem(new WaitCallback(GetSnapshot), container.JobId);
[779]155          break;
[2591]156
157
[1132]158        //Pull a Job from the Server
[2588]159        case MessageContainer.MessageType.FetchJob:         
[1719]160          if (!currentlyFetching) {
161            wcfService.SendJobAsync(ConfigManager.Instance.GetClientInfo().Id);
[1775]162            currentlyFetching = true;
[2588]163          } else
164            Logging.Instance.Info(this.ToString(), "Currently fetching, won't fetch this time!");
[811]165          break;         
[2025]166       
167       
[1132]168        //A Job has finished and can be sent back to the server
[2588]169        case MessageContainer.MessageType.FinishedJob:         
[2591]170          ThreadPool.QueueUserWorkItem(new WaitCallback(GetFinishedJob), container.JobId);
171          break;
[2068]172
[2591]173
[2068]174        //When the timeslice is up
[2025]175        case MessageContainer.MessageType.UptimeLimitDisconnect:
176          Logging.Instance.Info(this.ToString(), "Uptime Limit reached, storing jobs and sending them back");
[2063]177
178          //check if there are running jobs
179          if (engines.Count > 0) {
180            //make sure there is no more fetching of jobs while the snapshots get processed
181            currentlyFetching = true;
182            //request a snapshot of each running job
183            foreach (KeyValuePair<Guid, Executor> kvp in engines) {
184              kvp.Value.RequestSnapshot();
185            }
[2591]186
[2063]187          } else {
188            //Disconnect afterwards
189            WcfService.Instance.Disconnect();
190          }
[2025]191          break;
[2591]192
193
[1132]194        //Hard shutdown of the client
[1085]195        case MessageContainer.MessageType.Shutdown:
[1481]196          lock (engines) {
[1830]197            foreach (KeyValuePair<Guid, AppDomain> kvp in appDomains) {
198              appDomains[kvp.Key].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
[1481]199              AppDomain.Unload(kvp.Value);
[1830]200            }
[1481]201          }
[1368]202          abortRequested = true;
[1097]203          beat.StopHeartBeat();
[1635]204          WcfService.Instance.Logout(ConfigManager.Instance.GetClientInfo().Id);
[1085]205          break;
[779]206      }
207    }
[790]208
[1132]209    //Asynchronous Threads for interaction with the Execution Engine
[923]210    #region Async Threads for the EE
[2591]211
[2068]212    /// <summary>
213    /// serializes the finished job and submits it to the server. If, at the time, a network connection is unavailable, the Job gets stored on the disk.
214    /// once the connection gets reestablished, the job gets submitted
215    /// </summary>
216    /// <param name="jobId"></param>
[811]217    private void GetFinishedJob(object jobId) {
[2588]218      Guid jId = (Guid)jobId;
219      Logging.Instance.Info(this.ToString(), "Getting the finished job with id: " + jId);
[1368]220      try {
[1830]221        if (!engines.ContainsKey(jId)) {
222          Logging.Instance.Error(this.ToString(), "GetFinishedJob: Engine doesn't exist");
223          return;
224        }
[2591]225
[1368]226        byte[] sJob = engines[jId].GetFinishedJob();
[1005]227
[1368]228        if (WcfService.Instance.ConnState == NetworkEnum.WcfConnState.Loggedin) {
[2588]229          Logging.Instance.Info(this.ToString(), "Sending the finished job with id: " + jId);
[1449]230          wcfService.StoreFinishedJobResultAsync(ConfigManager.Instance.GetClientInfo().Id,
[1368]231            jId,
232            sJob,
233            1,
234            null,
235            true);
[1830]236        } else {
[2588]237          Logging.Instance.Info(this.ToString(), "Storing the finished job with id: " + jId + " to hdd");
[1368]238          JobStorageManager.PersistObjectToDisc(wcfService.ServerIP, wcfService.ServerPort, jId, sJob);
[1379]239          lock (engines) {
[1830]240            appDomains[jId].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
[1368]241            AppDomain.Unload(appDomains[jId]);
242            appDomains.Remove(jId);
243            engines.Remove(jId);
244            jobs.Remove(jId);
245          }
246        }
[1219]247      }
[1368]248      catch (InvalidStateException ise) {
[1371]249        Logging.Instance.Error(this.ToString(), "Exception: ", ise);
[1368]250      }
[804]251    }
252
[811]253    private void GetSnapshot(object jobId) {
[2588]254      Logging.Instance.Info(this.ToString(), "Fetching a snapshot for job " + jobId);
[1449]255      Guid jId = (Guid)jobId;
[816]256      byte[] obj = engines[jId].GetSnapshot();
[2588]257      Logging.Instance.Info(this.ToString(), "BEGIN: Sending snapshot sync");
[1812]258      wcfService.ProcessSnapshotSync(ConfigManager.Instance.GetClientInfo().Id,
[1147]259        jId,
260        obj,
261        engines[jId].Progress,
[1812]262        null);
[2588]263      Logging.Instance.Info(this.ToString(), "END: Sended snapshot sync");
[2063]264      //Uptime Limit reached, now is a good time to destroy this jobs.
265      if (!UptimeManager.Instance.isOnline()) {
[2591]266        KillAppDomain(jId);
[2063]267        //Still anything running?
268        if (engines.Count == 0)
269          WcfService.Instance.Disconnect();
[2591]270
[2063]271      } else {
[2588]272        Logging.Instance.Info(this.ToString(), "Restarting the job" + jobId);
[2063]273        engines[jId].StartOnlyJob();
274      }
[811]275    }
276
[923]277    #endregion
278
[1132]279    //Eventhandlers for the communication with the wcf Layer
[923]280    #region wcfService Events
[2068]281    /// <summary>
282    /// Login has returned
283    /// </summary>
284    /// <param name="sender"></param>
285    /// <param name="e"></param>
[923]286    void wcfService_LoginCompleted(object sender, LoginCompletedEventArgs e) {
287      if (e.Result.Success) {
[2063]288        currentlyFetching = false;
[2591]289        Logging.Instance.Info(this.ToString(), "Login completed to Hive Server @ " + DateTime.Now);
[923]290      } else
[1371]291        Logging.Instance.Error(this.ToString(), e.Result.StatusMessage);
[2591]292    }
[923]293
[2068]294    /// <summary>
295    /// A new Job from the wcfService has been received and will be started within a AppDomain.
296    /// </summary>
297    /// <param name="sender"></param>
298    /// <param name="e"></param>
[1379]299    void wcfService_SendJobCompleted(object sender, SendJobCompletedEventArgs e) {
[2588]300      Logging.Instance.Info(this.ToString(), "Received new job with id " + e.Result.Job.Id);
[1755]301      if (e.Result.StatusMessage != ApplicationConstants.RESPONSE_COMMUNICATOR_NO_JOBS_LEFT) {       
[1487]302        bool sandboxed = false;
[1602]303        List<byte[]> files = new List<byte[]>();
[2588]304        Logging.Instance.Info(this.ToString(), "Fetching plugins for job " + e.Result.Job.Id);
[2117]305        foreach (CachedHivePluginInfo plugininfo in PluginCache.Instance.GetPlugins(e.Result.Job.PluginsNeeded))
[1936]306          files.AddRange(plugininfo.PluginFiles);
[2588]307        Logging.Instance.Info(this.ToString(), "Plugins fetched for job " + e.Result.Job.Id);
[2591]308        AppDomain appDomain = HeuristicLab.PluginInfrastructure.Sandboxing.SandboxManager.CreateAndInitSandbox(e.Result.Job.Id.ToString(), files);
[997]309        appDomain.UnhandledException += new UnhandledExceptionEventHandler(appDomain_UnhandledException);
[2082]310        lock (engines) {
[2117]311          if (!jobs.ContainsKey(e.Result.Job.Id)) {
312            jobs.Add(e.Result.Job.Id, e.Result.Job);
313            appDomains.Add(e.Result.Job.Id, appDomain);
[2588]314            Logging.Instance.Info(this.ToString(), "Creating AppDomain");
[1033]315            Executor engine = (Executor)appDomain.CreateInstanceAndUnwrap(typeof(Executor).Assembly.GetName().Name, typeof(Executor).FullName);
[2588]316            Logging.Instance.Info(this.ToString(), "Created AppDomain");
[2117]317            engine.JobId = e.Result.Job.Id;
[2588]318            engine.Queue = MessageQueue.GetInstance();
319            Logging.Instance.Info(this.ToString(), "Starting Engine for job " + e.Result.Job.Id);
[2117]320            engine.Start(e.Data);
321            engines.Add(e.Result.Job.Id, engine);
[798]322
[1033]323            ClientStatusInfo.JobsFetched++;
[798]324
[1033]325            Debug.WriteLine("Increment FetchedJobs to:" + ClientStatusInfo.JobsFetched);
326          }
[2591]327        }
[2588]328      } else
329        Logging.Instance.Info(this.ToString(), "No more jobs left!");
[1775]330      currentlyFetching = false;
[798]331    }
[1379]332
[2068]333    /// <summary>
334    /// A finished job has been stored on the server
335    /// </summary>
336    /// <param name="sender"></param>
337    /// <param name="e"></param>
[1379]338    void wcfService_StoreFinishedJobResultCompleted(object sender, StoreFinishedJobResultCompletedEventArgs e) {
[2588]339      Logging.Instance.Info(this.ToString(), "Job submitted with id " + e.Result.JobId);
[2068]340      KillAppDomain(e.Result.JobId);
[2591]341      if (e.Result.Success) {
[1589]342        ClientStatusInfo.JobsProcessed++;
[2591]343        Debug.WriteLine("ProcessedJobs to:" + ClientStatusInfo.JobsProcessed);
344      } else {
[1589]345        Logging.Instance.Error(this.ToString(), "Sending of job " + e.Result.JobId + " failed, job has been wasted. Message: " + e.Result.StatusMessage);
[840]346      }
[779]347    }
[908]348
[2068]349    /// <summary>
350    /// A snapshot has been stored on the server
351    /// </summary>
352    /// <param name="sender"></param>
353    /// <param name="e"></param>
[1379]354    void wcfService_ProcessSnapshotCompleted(object sender, ProcessSnapshotCompletedEventArgs e) {
[1589]355      Logging.Instance.Info(this.ToString(), "Snapshot " + e.Result.JobId + " has been transmitted according to plan.");
[1379]356    }
357
[2068]358    /// <summary>
359    /// The server has been changed. All Appdomains and Jobs must be aborted!
360    /// </summary>
361    /// <param name="sender"></param>
362    /// <param name="e"></param>
[932]363    void wcfService_ServerChanged(object sender, EventArgs e) {
[1371]364      Logging.Instance.Info(this.ToString(), "ServerChanged has been called");
[1379]365      lock (engines) {
[2068]366        foreach (KeyValuePair<Guid, Executor> entries in engines) {
367          engines[entries.Key].Abort();
368          //appDomains[entries.Key].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
369          //AppDomain.Unload(appDomains[entries.Key]);
[1830]370        }
[2068]371        //appDomains = new Dictionary<Guid, AppDomain>();
372        //engines = new Dictionary<Guid, Executor>();
373        //jobs = new Dictionary<Guid, Job>();
[1081]374      }
375    }
376
[2068]377    /// <summary>
378    /// Connnection to the server has been estabilshed => Login and Send the persistet Jobs from the harddisk.
379    /// </summary>
380    /// <param name="sender"></param>
381    /// <param name="e"></param>
[1081]382    void wcfService_Connected(object sender, EventArgs e) {
[2591]383      wcfService.LoginSync(ConfigManager.Instance.GetClientInfo());
[1364]384      JobStorageManager.CheckAndSubmitJobsFromDisc();
[2068]385      currentlyFetching = false;
[932]386    }
387
[1097]388    //this is a little bit tricky -
[1083]389    void wcfService_ConnectionRestored(object sender, EventArgs e) {
[2591]390      Logging.Instance.Info(this.ToString(), "Reconnected to old server - checking currently running appdomains");
[1097]391
[1449]392      foreach (KeyValuePair<Guid, Executor> execKVP in engines) {
[1097]393        if (!execKVP.Value.Running && execKVP.Value.CurrentMessage == MessageContainer.MessageType.NoMessage) {
[1371]394          Logging.Instance.Info(this.ToString(), "Checking for JobId: " + execKVP.Value.JobId);
[1097]395          Thread finThread = new Thread(new ParameterizedThreadStart(GetFinishedJob));
396          finThread.Start(execKVP.Value.JobId);
397        }
398      }
[1083]399    }
[932]400
[923]401    #endregion
402
[1449]403    public Dictionary<Guid, Executor> GetExecutionEngines() {
[908]404      return engines;
405    }
[997]406
407    void appDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) {
[2591]408      Logging.Instance.Error(this.ToString(), "Exception in AppDomain: " + e.ExceptionObject.ToString());
[997]409    }
[1755]410
[2591]411    internal Dictionary<Guid, Job> GetJobs() {
[1755]412      return jobs;
413    }
[2068]414
415    /// <summary>
416    /// Kill a appdomain with a specific id.
417    /// </summary>
418    /// <param name="id">the GUID of the job</param>
419    private void KillAppDomain(Guid id) {
[2588]420      Logging.Instance.Info(this.ToString(), "Shutting down Appdomain for Job " + id);
[2068]421      lock (engines) {
422        try {
423          appDomains[id].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
424          AppDomain.Unload(appDomains[id]);
425          appDomains.Remove(id);
426          engines.Remove(id);
427          jobs.Remove(id);
[2591]428        }
[2068]429        catch (Exception ex) {
430          Logging.Instance.Error(this.ToString(), "Exception when unloading the appdomain: ", ex);
431        }
432      }
433      GC.Collect();
434    }
[714]435  }
436}
Note: See TracBrowser for help on using the repository browser.