#region License Information
/* HeuristicLab
* Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Services.Hive.Common;
using HeuristicLab.Services.Hive.Common.DataTransfer;
namespace HeuristicLab.Services.Hive {
public class HeartbeatManager {
private DataAccess.IHiveDao dao {
get { return ServiceLocator.Instance.HiveDao; }
}
private IAuthorizationManager auth {
get { return ServiceLocator.Instance.AuthorizationManager; }
}
///
/// This method will be called every time a slave sends a heartbeat (-> very often; concurrency is important!)
///
/// a list of actions the slave should do
public List ProcessHeartbeat(Heartbeat heartbeat) {
List actions = new List();
Slave slave = dao.GetSlave(heartbeat.SlaveId);
if (slave == null) {
actions.Add(new MessageContainer(MessageContainer.MessageType.SayHello));
} else {
// update slave data
slave.FreeCores = heartbeat.FreeCores;
slave.FreeMemory = heartbeat.FreeMemory;
slave.CpuUtilization = heartbeat.CpuUtilization;
slave.IsAllowedToCalculate = SlaveIsAllowedToCalculate(slave.Id);
slave.SlaveState = (heartbeat.JobProgress != null && heartbeat.JobProgress.Count > 0) ? SlaveState.Calculating : SlaveState.Idle;
slave.LastHeartbeat = DateTime.Now;
dao.UpdateSlave(slave);
// update job data
actions.AddRange(UpdateJobs(heartbeat, slave.IsAllowedToCalculate));
// assign new job
if (heartbeat.AssignJob && slave.IsAllowedToCalculate && heartbeat.FreeCores > 0) {
var availableJobs = dao.GetWaitingJobs(slave, 1);
if (availableJobs.Count() > 0) {
var job = availableJobs.First();
if (AssignJob(slave, job))
actions.Add(new MessageContainer(MessageContainer.MessageType.CalculateJob, job.Id));
}
}
}
return actions;
}
// returns true if assignment was successful
private bool AssignJob(Slave slave, Job job) {
// load job again and check if it is still available (this is an attempt to reduce the race condition which causes multiple heartbeats to get the same job assigned)
if (dao.GetJob(job.Id).State != JobState.Waiting) return false;
job = dao.UpdateJobState(job.Id, JobState.Transferring, slave.Id, null, null);
// from now on the job has some time to send the next heartbeat (ApplicationConstants.TransferringJobHeartbeatTimeout)
job.LastHeartbeat = DateTime.Now;
dao.UpdateJob(job);
return true;
}
///
/// Update the progress of each job
/// Checks if all the jobs sent by heartbeat are supposed to be calculated by this slave
///
private IEnumerable UpdateJobs(Heartbeat heartbeat, bool IsAllowedToCalculate) {
List actions = new List();
if (heartbeat.JobProgress == null)
return actions;
if (!IsAllowedToCalculate) {
actions.Add(new MessageContainer(MessageContainer.MessageType.PauseAll));
} else {
// process the jobProgresses
foreach (var jobProgress in heartbeat.JobProgress) {
Job curJob = dao.GetJob(jobProgress.Key);
if (curJob == null) {
// job does not exist in db
actions.Add(new MessageContainer(MessageContainer.MessageType.AbortJob, jobProgress.Key));
LogFactory.GetLogger(this.GetType().Namespace).Log("Job does not exist in DB: " + jobProgress.Key);
} else {
if (curJob.CurrentStateLog.SlaveId == Guid.Empty || curJob.CurrentStateLog.SlaveId != heartbeat.SlaveId) {
// assigned slave does not match heartbeat
actions.Add(new MessageContainer(MessageContainer.MessageType.AbortJob, curJob.Id));
LogFactory.GetLogger(this.GetType().Namespace).Log("The slave " + heartbeat.SlaveId + " is not supposed to calculate Job: " + curJob);
} else if (!JobIsAllowedToBeCalculatedBySlave(heartbeat.SlaveId, curJob)) {
// assigned resources ids of job do not match with slaveId (and parent resourceGroupIds); this might happen when slave is moved to different group
actions.Add(new MessageContainer(MessageContainer.MessageType.PauseJob, curJob.Id));
} else {
// save job execution time
curJob.ExecutionTime = jobProgress.Value;
curJob.LastHeartbeat = DateTime.Now;
switch (curJob.Command) {
case Command.Stop:
actions.Add(new MessageContainer(MessageContainer.MessageType.StopJob, curJob.Id));
break;
case Command.Pause:
actions.Add(new MessageContainer(MessageContainer.MessageType.PauseJob, curJob.Id));
break;
case Command.Abort:
actions.Add(new MessageContainer(MessageContainer.MessageType.AbortJob, curJob.Id));
break;
}
dao.UpdateJob(curJob);
}
}
}
}
return actions;
}
private bool JobIsAllowedToBeCalculatedBySlave(Guid slaveId, Job curJob) {
var assignedResourceIds = dao.GetAssignedResources(curJob.Id).Select(x => x.Id);
var slaveResourceIds = dao.GetParentResources(slaveId).Select(x => x.Id);
return assignedResourceIds.Any(x => slaveResourceIds.Contains(x));
}
private bool SlaveIsAllowedToCalculate(Guid slaveId) {
// appointment==doNotCompute! the slave may only calculate if there is no appointment right now. this needs to be checked for every parent resource also
return dao.GetParentResources(slaveId).All(r => dao.GetAppointments(x => x.ResourceId == r.Id && (DateTime.Now >= x.StartDate) && (DateTime.Now <= x.EndDate)).Count() == 0);
}
}
}