Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive/3.4/LifecycleManager.cs @ 6229

Last change on this file since 6229 was 6229, checked in by cneumuel, 13 years ago

#1233

  • added basic statistics recording for (once per minute)
    • executiontime per user
    • usedcores, usedmemory per slave
File size: 4.9 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using HeuristicLab.Services.Hive.Common;
5using HeuristicLab.Services.Hive.Common.DataTransfer;
6
7namespace HeuristicLab.Services.Hive {
8  /// <summary>
9  /// This class offers methods for cleaning up offline slaves and jobs
10  /// </summary>
11  public class LifecycleManager : ILifecycleManager {
12    private DataAccess.IHiveDao dao {
13      get { return ServiceLocator.Instance.HiveDao; }
14    }
15    private HeuristicLab.Services.Hive.DataAccess.TransactionManager trans {
16      get { return ServiceLocator.Instance.TransactionManager; }
17    }
18    private IAuthorizationManager auth {
19      get { return ServiceLocator.Instance.AuthorizationManager; }
20    }
21    private ILogger log {
22      get { return LogFactory.GetLogger(this.GetType().Namespace); }
23    }
24
25    public void Cleanup() {
26      log.Log("LifecycleManager.Cleanup()");
27      SetTimeoutSlavesOffline();
28      SetTimeoutJobsWaiting();
29      FinishParentJobs();
30      UpdateStatistics();
31    }
32
33    private void UpdateStatistics() {
34      var slaves = dao.GetSlaves(x => x.SlaveState == SlaveState.Calculating || x.SlaveState == SlaveState.Idle);
35
36      var stats = new Statistics();
37      stats.TimeStamp = DateTime.Now;
38      var slaveStats = new List<SlaveStatistics>();
39      foreach (var slave in slaves) {
40        slaveStats.Add(new SlaveStatistics() {
41          SlaveId = slave.Id,
42          Cores = slave.Cores.HasValue ? slave.Cores.Value : 0,
43          FreeCores = slave.FreeCores.HasValue ? slave.FreeCores.Value : 0,
44          Memory = slave.Memory.HasValue ? slave.Memory.Value : 0,
45          FreeMemory = slave.FreeMemory.HasValue ? slave.FreeMemory.Value : 0,
46          CpuUtilization = slave.CpuUtilization
47        });
48      }
49      stats.SlaveStatistics = slaveStats;
50
51      stats.UserStatistics = dao.GetUserStatistics();
52
53      dao.AddStatistics(stats);
54
55      //int slaveCount = slaves.Count();
56      //int cores = slaves.Select(x => x.Cores.HasValue ? x.Cores.Value : 0).Sum();
57      //int usedCores = slaves.Select(x => x.FreeCores.HasValue ? x.FreeCores.Value : 0).Sum();
58
59      //long memoryMb = slaves.Select(x => x.Memory.HasValue ? x.Memory.Value : 0).Sum(); ;
60      //long memoryUsedMb = slaves.Select(x => x.FreeMemory.HasValue ? x.FreeMemory.Value : 0).Sum();
61
62      //double averageCpuUtilization = slaves.Select(x => x.CpuUtilization.HasValue ? x.CpuUtilization.Value : 0).Average();
63
64      //int executionTimeDelta = 0; // since last update
65
66      //{ // per user
67      //  int activeJobs = 0; // calculating, transferring
68      //  int waitingJobs = 0; // waiting
69      //  int stoppedJobs = 0; // finished, failed, aborted, paused
70      //}
71
72      //{ // per slave
73      //  int activeJobs = 0; // calculating, transferring
74      //  double cpuUtilization = 0.0;
75      //}
76
77    }
78
79    /// <summary>
80    /// Searches for slaves which are timed out, puts them and their jobs offline
81    /// </summary>
82    private void SetTimeoutSlavesOffline() {
83      var slaves = dao.GetSlaves(x => x.SlaveState != SlaveState.Offline);
84      foreach (Slave slave in slaves) {
85        if (!slave.LastHeartbeat.HasValue || (DateTime.Now - slave.LastHeartbeat.Value) > ApplicationConstants.SlaveHeartbeatTimeout) {
86          slave.SlaveState = SlaveState.Offline;
87          SetJobsWaiting(slave.Id);
88          dao.UpdateSlave(slave);
89        }
90      }
91    }
92
93    /// <summary>
94    /// Looks for parent jobs which have FinishWhenChildJobsFinished and set their state to finished
95    /// </summary>
96    private void FinishParentJobs() {
97      var parentJobsToFinish = dao.GetParentJobs(dao.GetResources(x => true).Select(x => x.Id), 0, true);
98      foreach (var job in parentJobsToFinish) {
99        dao.UpdateJobState(job.Id, JobState.Finished, null, null, string.Empty);
100      }
101    }
102
103    private void SetJobsWaiting(Guid slaveId) {
104      var jobs = dao.GetJobs(x => x.State == JobState.Calculating).Where(x => x.StateLog.Last().SlaveId == slaveId);
105      foreach (var j in jobs) {
106        Job job = dao.UpdateJobState(j.Id, JobState.Waiting, slaveId, null, "Slave timed out.");
107        job.Command = null;
108        dao.UpdateJob(job);
109      }
110    }
111
112    /// <summary>
113    /// Looks for jobs which have not sent heartbeats for some time and reschedules them for calculation
114    /// </summary>
115    private void SetTimeoutJobsWaiting() {
116      var jobs = dao.GetJobs(x => (x.State == JobState.Calculating && (DateTime.Now - x.LastHeartbeat) > ApplicationConstants.CalculatingJobHeartbeatTimeout)
117                               || (x.State == JobState.Transferring && (DateTime.Now - x.LastHeartbeat) > ApplicationConstants.TransferringJobHeartbeatTimeout));
118      foreach (var j in jobs) {
119        Job job = dao.UpdateJobState(j.Id, JobState.Waiting, null, null, "Slave timed out.");
120        job.Command = null;
121        dao.UpdateJob(job);
122      }
123    }
124  }
125}
Note: See TracBrowser for help on using the repository browser.