using System; using System.Linq; using HeuristicLab.Services.Hive.Common; using HeuristicLab.Services.Hive.Common.DataTransfer; namespace HeuristicLab.Services.Hive { /// /// This class offers methods for cleaning up offline slaves and jobs /// public class LifecycleManager : ILifecycleManager { private DataAccess.IHiveDao dao { get { return ServiceLocator.Instance.HiveDao; } } private HeuristicLab.Services.Hive.DataAccess.TransactionManager trans { get { return ServiceLocator.Instance.TransactionManager; } } private IAuthorizationManager auth { get { return ServiceLocator.Instance.AuthorizationManager; } } private ILogger log { get { return LogFactory.GetLogger(this.GetType().Namespace); } } public void Cleanup() { log.Log("LifecycleManager.Cleanup()"); SetTimeoutSlavesOffline(); SetTimeoutJobsWaiting(); FinishParentJobs(); } /// /// Searches for slaves which are timed out, puts them and their jobs offline /// private void SetTimeoutSlavesOffline() { var slaves = dao.GetSlaves(x => x.SlaveState != SlaveState.Offline); foreach (Slave slave in slaves) { if (!slave.LastHeartbeat.HasValue || (DateTime.Now - slave.LastHeartbeat.Value) > ApplicationConstants.SlaveHeartbeatTimeout) { slave.SlaveState = SlaveState.Offline; SetJobsWaiting(slave.Id); dao.UpdateSlave(slave); } } } /// /// Looks for parent jobs which have FinishWhenChildJobsFinished and set their state to finished /// private void FinishParentJobs() { var parentJobsToFinish = dao.GetParentJobs(dao.GetResources(x => true).Select(x => x.Id), 0, true); foreach (var job in parentJobsToFinish) { dao.UpdateJobState(job.Id, JobState.Finished, null, null, string.Empty); } } private void SetJobsWaiting(Guid slaveId) { var jobs = dao.GetJobs(x => x.State == JobState.Calculating).Where(x => x.StateLog.Last().SlaveId == slaveId); foreach (var j in jobs) { Job job = dao.UpdateJobState(j.Id, JobState.Waiting, slaveId, null, "Slave timed out."); job.Command = null; dao.UpdateJob(job); } } /// /// Looks for jobs which have not sent heartbeats for some time and reschedules them for calculation /// private void SetTimeoutJobsWaiting() { var jobs = dao.GetJobs(x => (x.State == JobState.Calculating && (DateTime.Now - x.LastHeartbeat) > ApplicationConstants.CalculatingJobHeartbeatTimeout) || (x.State == JobState.Transferring && (DateTime.Now - x.LastHeartbeat) > ApplicationConstants.TransferringJobHeartbeatTimeout)); foreach (var j in jobs) { Job job = dao.UpdateJobState(j.Id, JobState.Waiting, null, null, "Slave timed out."); job.Command = null; dao.UpdateJob(job); } } } }