Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/22/11 11:36:53 (13 years ago)
Author:
cneumuel
Message:

#1233

  • implemented correct numbering of BatchRuns
  • improvements in ExperimentManager
  • fixed bug in server (jobs were scheduled multiple times)
  • added exception handling for task in slave
  • improved timeout handling of jobs (LifecycleManager)
File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive/3.4/LifecycleManager.cs

    r5779 r5786  
    2525      log.Log("LifecycleManager.Cleanup()");
    2626      SetTimeoutSlavesOffline();
    27       FinishParentJobs();
     27      SetTimeoutJobsWaiting();
     28      FinishParentJobs();     
    2829    }
    2930
     
    3435      var slaves = dao.GetSlaves(x => x.SlaveState != SlaveState.Offline);
    3536      foreach (Slave slave in slaves) {
    36         if (!slave.LastHeartbeat.HasValue || (DateTime.Now - slave.LastHeartbeat.Value).TotalSeconds > ApplicationConstants.HeartbeatTimeout) {
     37        if (!slave.LastHeartbeat.HasValue || (DateTime.Now - slave.LastHeartbeat.Value) > ApplicationConstants.SlaveHeartbeatTimeout) {
    3738          slave.SlaveState = SlaveState.Offline;
    3839          SetJobsWaiting(slave.Id);
     
    5556      var jobs = dao.GetJobs(x => x.State == JobState.Calculating).Where(x => x.StateLog.Last().SlaveId == slaveId);
    5657      foreach (var j in jobs) {
    57         Job job = dao.UpdateJobState(j.Id, JobState.Waiting, slaveId, null, "Slave timed out");
     58        Job job = dao.UpdateJobState(j.Id, JobState.Waiting, slaveId, null, "Slave timed out.");
    5859        job.Command = null;
    5960        dao.UpdateJob(job);
     
    6162    }
    6263
     64    /// <summary>
     65    /// Looks for jobs which have not sent heartbeats for some time and reschedules them for calculation
     66    /// </summary>
     67    private void SetTimeoutJobsWaiting() {
     68      var jobs = dao.GetJobs(x => (x.State == JobState.Calculating && (DateTime.Now - x.LastHeartbeat) > ApplicationConstants.CalculatingJobHeartbeatTimeout)
     69                               || (x.State == JobState.Transferring && (DateTime.Now - x.LastHeartbeat) > ApplicationConstants.TransferringJobHeartbeatTimeout));
     70      foreach (var j in jobs) {
     71        Job job = dao.UpdateJobState(j.Id, JobState.Waiting, null, null, "Slave timed out.");
     72        job.Command = null;
     73        dao.UpdateJob(job);
     74      }
     75    }
    6376  }
    6477}
Note: See TracChangeset for help on using the changeset viewer.