Free cookie consent management tool by TermsFeed Policy Generator

Changeset 5450


Ignore:
Timestamp:
02/07/11 11:44:15 (13 years ago)
Author:
ascheibe
Message:

#1233

  • added Pause/Stop/Abort mechanisms to the slave
  • added Pause to Jobs
Location:
branches/HeuristicLab.Hive-3.4/sources
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave.Tests/Mocks/MockJob.cs

    r5314 r5450  
    204204    }
    205205
     206    public event EventHandler JobPaused;
     207    protected virtual void OnJobPaused() {
     208      EventHandler handler = JobPaused;
     209      if (handler != null) handler(this, EventArgs.Empty);
     210    }
    206211    #endregion
    207212  }
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave.Views/3.4/SlaveItem.cs

    r5320 r5450  
    9595      try {
    9696        if (pipeFactory.State != CommunicationState.Faulted && pipeFactory.State != CommunicationState.Closed)
    97           pipeProxy.SoftPause();
     97          pipeProxy.StopAll();
    9898      }
    9999      catch (Exception e) {
     
    105105      try {
    106106        if (pipeFactory.State != CommunicationState.Faulted && pipeFactory.State != CommunicationState.Closed)
    107           pipeProxy.HardPause();
     107          pipeProxy.PauseAll();
    108108      }
    109109      catch (Exception e) {
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/Core.cs

    r5404 r5450  
    4040
    4141    //TODO: this class should be a singleton; there is only one instance, the reference is meanwhile save in TheCore
    42     public static Core theCore;
     42    public static Core TheCore;
    4343
    4444    public static bool abortRequested { get; set; }
     
    6666
    6767    public Core() {
    68       theCore = this;
     68      TheCore = this;
    6969    }
    7070
     
    140140      } else if (container is MessageContainer) {
    141141        switch (container.Message) {
    142           //Server requests to abort a job
    143           case MessageContainer.MessageType.AbortJob:
    144             if (engines.ContainsKey(container.JobId))
    145               try {
    146                 engines[container.JobId].Abort();
    147               }
    148               catch (AppDomainUnloadedException) {
    149                 // appdomain already unloaded. Finishing job probably ongoing
    150               } else
    151               ClientCom.LogMessage("AbortJob: Engine doesn't exist");
    152             break;
    153 
    154142          //Pull a Job from the Server
    155143          case MessageContainer.MessageType.CalculateJob:
     
    160148            break;
    161149
    162           //Hard shutdown of the client
    163150          case MessageContainer.MessageType.ShutdownSlave:
    164151            ShutdownCore();
    165152            break;
    166           case MessageContainer.MessageType.HardPause:
    167             doHardPause();
    168             break;
    169           case MessageContainer.MessageType.SoftPause:
    170             doSoftPause();
     153          case MessageContainer.MessageType.StopAll:
     154            DoStopAll();
     155            break;
     156          case MessageContainer.MessageType.PauseAll:
     157            DoPauseAll();
     158            break;
     159          case MessageContainer.MessageType.AbortAll:
     160            DoAbortAll();
     161            break;
     162          case MessageContainer.MessageType.AbortJob:
     163            KillAppDomain(container.JobId);
     164            break;
     165          case MessageContainer.MessageType.StopJob:
     166            DoStopJob(container.JobId);
     167            break;
     168          case MessageContainer.MessageType.PauseJob:
     169            DoPauseJob(container.JobId);
    171170            break;
    172171          case MessageContainer.MessageType.Restart:
    173             doRestart();
     172            DoStartSlave();
    174173            break;
    175174        }
     
    179178    }
    180179
    181     /// <summary>
    182     /// reinitializes everything and continues operation,
    183     /// can be called after SoftPause() or HardPause()
    184     /// </summary>
    185     public void Restart() {
    186       MessageContainer mc = new MessageContainer(MessageContainer.MessageType.Restart);
    187       MessageQueue.GetInstance().AddMessage(mc);
    188     }
    189 
    190     private void doRestart() {
    191       ClientCom.LogMessage("Restart received");
    192       StartHeartbeats();
    193       ClientCom.LogMessage("Restart done");
    194     }
    195 
    196     /// <summary>
    197     /// wait for jobs to finish, then pause client
    198     /// </summary>
    199     public void SoftPause() {
    200       MessageContainer mc = new MessageContainer(MessageContainer.MessageType.SoftPause);
    201       MessageQueue.GetInstance().AddMessage(mc);
    202     }
    203 
    204     private void doSoftPause() {
    205       ClientCom.LogMessage("Soft pause received");
    206 
    207       //TODO: jobs get removed from Jobs map, is this a problem?
    208       foreach (Job job in Jobs.Values) {
     180    private void DoPauseJob(Guid guid) {
     181      Job job = Jobs[guid];
     182
     183      if (job != null) {
    209184        engines[job.Id].Pause();
    210185        JobData sJob = engines[job.Id].GetFinishedJob();
     
    224199        }
    225200      }
    226 
     201    }
     202
     203    private void DoStopJob(Guid guid) {
     204      Job job = Jobs[guid];
     205
     206      if (job != null) {
     207        engines[job.Id].Stop();
     208        JobData sJob = engines[job.Id].GetFinishedJob();
     209        job.Exception = engines[job.Id].CurrentException;
     210        job.ExecutionTime = engines[job.Id].ExecutionTime;
     211
     212        try {
     213          ClientCom.LogMessage("Sending the stoppped job with id: " + job.Id);
     214          wcfService.UpdateJob(job, sJob);
     215          SlaveStatusInfo.JobsProcessed++;    //TODO: count or not count, thats the question
     216        }
     217        catch (Exception e) {
     218          ClientCom.LogMessage("Transmitting to server failed. Storing the paused job with id: " + job.Id + " to hdd (" + e.ToString() + ")");
     219        }
     220        finally {
     221          KillAppDomain(job.Id); // kill app-domain in every case         
     222        }
     223      }
     224    }
     225
     226    /// <summary>
     227    /// aborts all running jobs, no results are sent back
     228    /// </summary>
     229    private void DoAbortAll() {
     230      List<Guid> guids = new List<Guid>();
     231      foreach (Guid job in Jobs.Keys) {
     232        guids.Add(job);
     233      }
     234
     235      foreach (Guid g in guids) {
     236        KillAppDomain(g);
     237      }
     238
     239      ClientCom.LogMessage("Aborted all jobs!");
     240    }
     241
     242    /// <summary>
     243    /// wait for jobs to finish, then pause client
     244    /// </summary>
     245    private void DoPauseAll() {
     246      ClientCom.LogMessage("Pause all received");
     247
     248      //copy guids because there will be removed items from 'Jobs'
     249      List<Guid> guids = new List<Guid>();
     250      foreach (Guid job in Jobs.Keys) {
     251        guids.Add(job);
     252      }
     253
     254      foreach (Guid g in guids) {
     255        DoPauseJob(g);
     256      }
     257    }
     258
     259    /// <summary>
     260    /// pause slave immediately
     261    /// </summary>
     262    private void DoStopAll() {
     263      ClientCom.LogMessage("Stop all received");
     264
     265      //copy guids because there will be removed items from 'Jobs'
     266      List<Guid> guids = new List<Guid>();
     267      foreach (Guid job in Jobs.Keys) {
     268        guids.Add(job);
     269      }
     270
     271      foreach (Guid g in guids) {
     272        DoStopJob(g);
     273      }
     274    }
     275
     276    /// <summary>
     277    /// completly shudown slave
     278    /// </summary>
     279    public void Shutdown() {
     280      MessageContainer mc = new MessageContainer(MessageContainer.MessageType.ShutdownSlave);
     281      MessageQueue.GetInstance().AddMessage(mc);
     282      waitShutdownSem.WaitOne();
     283    }
     284
     285    /// <summary>
     286    /// complete shutdown, should be called before the the application is exited
     287    /// </summary>
     288    private void ShutdownCore() {
     289      ClientCom.LogMessage("Shutdown Signal received");
     290      ClientCom.LogMessage("Stopping heartbeat");
    227291      heartbeatManager.StopHeartBeat();
    228       WcfService.Instance.Disconnect();
    229       ClientCom.LogMessage("Soft pause done");
    230     }
    231 
    232     /// <summary>
    233     /// pause slave immediately
    234     /// </summary>
    235     public void HardPause() {
    236       MessageContainer mc = new MessageContainer(MessageContainer.MessageType.HardPause);
    237       MessageQueue.GetInstance().AddMessage(mc);
    238     }
    239 
    240     private void doHardPause() {
    241       ClientCom.LogMessage("Hard pause received");
    242       heartbeatManager.StopHeartBeat();
     292      abortRequested = true;
     293      ClientCom.LogMessage("Logging out");
     294
    243295
    244296      lock (engines) {
     
    251303      }
    252304      WcfService.Instance.Disconnect();
    253       ClientCom.LogMessage("Hard pause done");
    254     }
    255 
    256     public void Shutdown() {
    257       MessageContainer mc = new MessageContainer(MessageContainer.MessageType.ShutdownSlave);
    258       MessageQueue.GetInstance().AddMessage(mc);
    259       waitShutdownSem.WaitOne();
    260     }
    261 
    262     /// <summary>
    263     /// hard shutdown, should be called before the the application is exited
    264     /// </summary>
    265     private void ShutdownCore() {
    266       ClientCom.LogMessage("Shutdown Signal received");
    267       ClientCom.LogMessage("Stopping heartbeat");
    268       heartbeatManager.StopHeartBeat();
    269       abortRequested = true;
    270       ClientCom.LogMessage("Logging out");
    271 
    272 
    273       lock (engines) {
    274         ClientCom.LogMessage("engines locked");
    275         foreach (KeyValuePair<Guid, AppDomain> kvp in appDomains) {
    276           ClientCom.LogMessage("Shutting down Appdomain for " + kvp.Key);
    277           appDomains[kvp.Key].UnhandledException -= new UnhandledExceptionEventHandler(appDomain_UnhandledException);
    278           AppDomain.Unload(kvp.Value);
    279         }
    280       }
    281       WcfService.Instance.Disconnect();
    282305      ClientCom.Shutdown();
    283306      SlaveClientCom.Close();
     
    288311
    289312    /// <summary>
     313    /// reinitializes everything and continues operation,
     314    /// can be called after Sleep()
     315    /// </summary> 
     316    private void DoStartSlave() {
     317      ClientCom.LogMessage("Restart received");
     318      StartHeartbeats();
     319      ClientCom.LogMessage("Restart done");
     320    }
     321
     322    /// <summary>
     323    /// stop slave, except for client gui communication,
     324    /// primarily used by gui if core is running as windows service
     325    /// </summary>
     326    //TODO: do we need an AbortSleep?
     327    private void Sleep() {
     328      ClientCom.LogMessage("Sleep received");
     329      heartbeatManager.StopHeartBeat();
     330      DoStopAll();
     331      WcfService.Instance.Disconnect();
     332      ClientCom.LogMessage("Sleep done");
     333    }
     334
     335    /// <summary>
    290336    /// Pauses a job, which means sending it to the server and killing it locally;
    291337    /// atm only used when executor is waiting for child jobs
     
    293339    /// <param name="data"></param>
    294340    [MethodImpl(MethodImplOptions.Synchronized)]
    295     public void PauseJob(JobData data) {
     341    public void PauseWaitJob(JobData data) {
    296342      if (!Jobs.ContainsKey(data.JobId)) {
    297343        ClientCom.LogMessage("Can't find job with id " + data.JobId);
     
    377423              ClientCom.LogMessage("Created AppDomain");
    378424              engine.JobId = myJob.Id;
    379               engine.core = this;
     425              engine.Core = this;
    380426              ClientCom.LogMessage("Starting Engine for job " + myJob.Id);
    381427              engines.Add(myJob.Id, engine);
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/Executor.cs

    r5314 r5450  
    3535    public IJob Job { get; set; }
    3636    private bool wasJobAborted = false;
    37     public Core core { get; set; }
     37    public Core Core { get; set; }
    3838
    3939    private Exception currentException;
     
    8484    }
    8585
    86     public void StartOnlyJob() {
    87       try {
    88         Job.Start();
    89       }
    90       catch (Exception e) {
    91         this.currentException = e;
    92       }
     86    public void Pause() {
     87      Job.Pause();
     88     
    9389    }
    9490
    95     public void Pause() {
    96       Job.Pause();
    97     }
    98 
    99     public void Abort() {
     91    public void Stop() {
    10092      wasJobAborted = true;
    10193      if ((ExecutionState == ExecutionState.Started) || (ExecutionState == ExecutionState.Paused)) {
     
    153145      jdata.JobId = this.JobId;
    154146
    155       core.PauseJob(jdata);
     147      Core.PauseWaitJob(jdata);
    156148    }
    157149
     
    164156      HeuristicLab.Common.EventArgs<Exception> ex = (HeuristicLab.Common.EventArgs<Exception>)e;
    165157      currentException = ex.Value;
    166       core.SendFinishedJob(JobId);
     158      Core.SendFinishedJob(JobId);
    167159    }
    168160
    169161    private void Job_JobStopped(object sender, EventArgs e) {
    170162      if (wasJobAborted) {
    171         core.KillAppDomain(JobId);
     163        Core.KillAppDomain(JobId);
    172164      } else {
    173         core.SendFinishedJob(JobId);
     165        Core.SendFinishedJob(JobId);
    174166      }
    175167    }
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/ServiceContracts/ISlaveCommunication.cs

    r5314 r5450  
    3434
    3535
    36     //commands from gui to slave core
    3736    [OperationContract]
    3837    void Restart();
    3938
    4039    [OperationContract]
    41     void HardPause();
     40    void PauseAll();
    4241
    4342    [OperationContract]
    44     void SoftPause();
     43    void StopAll();
     44
     45    [OperationContract]
     46    void AbortAll();
    4547
    4648    [OperationContract]
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/SlaveClientCom.cs

    r5314 r5450  
    2121
    2222using System.ServiceModel;
    23 using HeuristicLab.Clients.Hive.Slave;
    2423using HeuristicLab.Clients.Hive.Slave.ServiceContracts;
    2524
     
    4847
    4948    private SlaveClientCom() {
    50       setupClientCom();
     49      SetupClientCom();
    5150    }
    5251
    53     private void setupClientCom() {
     52    private void SetupClientCom() {
    5453      DummyListener dummy = new DummyListener();
    5554      pipeFactory = new DuplexChannelFactory<ISlaveCommunication>(
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/SlaveCommunicationService.cs

    r5402 r5450  
    2323using System.ServiceModel;
    2424using HeuristicLab.Clients.Hive.Slave.ServiceContracts;
     25using HeuristicLab.Services.Hive.Common;
    2526
    2627namespace HeuristicLab.Clients.Hive.Slave {
     
    8485
    8586    public void Restart() {
    86       Core.theCore.Restart();
    87     }
    88     public void HardPause() {
    89       Core.theCore.HardPause();
     87      MessageContainer mc = new MessageContainer(MessageContainer.MessageType.Restart);
     88      MessageQueue.GetInstance().AddMessage(mc);
    9089    }
    9190
    92     public void SoftPause() {
    93       Core.theCore.SoftPause();
     91    public void PauseAll() {
     92      MessageContainer mc = new MessageContainer(MessageContainer.MessageType.PauseAll);
     93      MessageQueue.GetInstance().AddMessage(mc);
     94    }
     95
     96    public void StopAll() {
     97      MessageContainer mc = new MessageContainer(MessageContainer.MessageType.StopAll);
     98      MessageQueue.GetInstance().AddMessage(mc);
    9499    }
    95100
    96101    public void ShutdownSlave() {
    97       Core.theCore.Shutdown();
     102      Core.TheCore.Shutdown();
     103    }
     104
     105    public void AbortAll() {
     106      MessageContainer mc = new MessageContainer(MessageContainer.MessageType.AbortAll);
     107      MessageQueue.GetInstance().AddMessage(mc);
    98108    }
    99109  }
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive/3.4/Jobs/OptimizerJob.cs

    r5363 r5450  
    155155
    156156    public void Pause() {
    157       throw new NotImplementedException();
     157      optimizer.Pause();
    158158    }
    159159
     
    169169    protected virtual void OnJobStopped() {
    170170      EventHandler handler = JobStopped;
     171      if (handler != null) handler(this, EventArgs.Empty);
     172    }
     173
     174    public event EventHandler JobPaused;
     175    protected void OnJobPaused(object sender, EventArgs e) {
     176      EventHandler handler = JobPaused;
    171177      if (handler != null) handler(this, EventArgs.Empty);
    172178    }
     
    212218    protected virtual void RegisterEvents() {
    213219      optimizer.Stopped += new EventHandler(optimizer_Stopped);
     220      optimizer.Paused += new EventHandler(OnJobPaused);
    214221      optimizer.ExceptionOccurred += new EventHandler<EventArgs<Exception>>(optimizer_ExceptionOccurred);
    215222      optimizer.DescriptionChanged += new EventHandler(optimizer_DescriptionChanged);
     
    219226      optimizer.ToStringChanged += new EventHandler(optimizer_ToStringChanged);
    220227    }
     228
    221229    protected virtual void DeregisterEvents() {
    222230      optimizer.Stopped -= new EventHandler(optimizer_Stopped);
     231      optimizer.Paused -= new EventHandler(OnJobPaused);
    223232      optimizer.ExceptionOccurred -= new EventHandler<EventArgs<Exception>>(optimizer_ExceptionOccurred);
    224233      optimizer.DescriptionChanged -= this.DescriptionChanged;
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Hive/3.4/IJob.cs

    r5103 r5450  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425using HeuristicLab.Core;
    25 using HeuristicLab.Common;
    2626
    2727namespace HeuristicLab.Hive {
     
    3131
    3232    ExecutionState ExecutionState { get; }
    33    
     33
    3434    /// <summary>
    3535    /// indicates wether it is possible to create childjobs from this job
     
    4747    /// </summary>
    4848    bool CollectChildJobs { get; set; }
    49    
     49
    5050    void Prepare();
    5151
     
    6767
    6868    event EventHandler JobStopped;
    69        
     69
     70    event EventHandler JobPaused;
     71
    7072    /// <summary>
    7173    /// When this event occurs the job wants to sleep until all his child jobs are finished
     
    8284    /// </summary>
    8385    event EventHandler DeleteChildJobs;
    84    
     86
    8587
    8688  }
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive.Common/3.4/MessageContainer.cs

    r5404 r5450  
    3737      CalculateJob, // slave should calculate a job. the job is already assigned to the slave
    3838      StopJob,   // slave should stop the job and submit results
     39      StopAll,   // stop all and submit results
    3940      AbortJob,  // slave should shut the job down immediately without submitting results
    40       PauseJob,  // pause the job and submit the results
    41       SoftPause,      // slave should stop all jobs, submit results and pause
    42       HardPause,      // abort jobs and pause
     41      AbortAll,  // slave should abort all jobs immediately
     42      PauseJob,  // pause the job and submit the results   
     43      PauseAll,  // pause all jobs and submit results
    4344      Restart,        //restart operation after Soft/HardPause
    4445      ShutdownSlave,  // slave should shutdown immediately without submitting results
Note: See TracChangeset for help on using the changeset viewer.