source: trunk/sources/HeuristicLab.Services.Hive/3.3/Manager/EventManager.cs @ 7259

Last change on this file since 7259 was 7259, checked in by swagner, 8 years ago

Updated year of copyrights to 2012 (#1716)

File size: 5.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Services.Hive.DataAccess;
26using DT = HeuristicLab.Services.Hive.DataTransfer;
27
28
29namespace HeuristicLab.Services.Hive {
30  /// <summary>
31  /// This class offers methods for cleaning up offline slaves and task
32  /// </summary>
33  public class EventManager : IEventManager {
34    private IHiveDao dao {
35      get { return ServiceLocator.Instance.HiveDao; }
36    }
37    private IAuthorizationManager auth {
38      get { return ServiceLocator.Instance.AuthorizationManager; }
39    }
40    private ILogger log {
41      get { return LogFactory.GetLogger(this.GetType().Namespace); }
42    }
43    private DataAccess.ITransactionManager trans {
44      get { return ServiceLocator.Instance.TransactionManager; }
45    }
46
47    public void Cleanup() { 
48      trans.UseTransaction(() => {
49        SetTimeoutSlavesOffline();
50        SetTimeoutTasksWaiting();
51      }, true);
52
53      trans.UseTransaction(() => {
54        FinishParentTasks();
55        UpdateStatistics();
56      }, false);
57    }
58
59    private void UpdateStatistics() {
60      var slaves = dao.GetSlaves(x => x.SlaveState == SlaveState.Calculating || x.SlaveState == SlaveState.Idle);
61
62      var stats = new DataTransfer.Statistics();
63      stats.TimeStamp = DateTime.Now;
64      var slaveStats = new List<DT.SlaveStatistics>();
65      foreach (var slave in slaves) {
66        slaveStats.Add(new DT.SlaveStatistics() {
67          SlaveId = slave.Id,
68          Cores = slave.Cores.HasValue ? slave.Cores.Value : 0,
69          FreeCores = slave.FreeCores.HasValue ? slave.FreeCores.Value : 0,
70          Memory = slave.Memory.HasValue ? slave.Memory.Value : 0,
71          FreeMemory = slave.FreeMemory.HasValue ? slave.FreeMemory.Value : 0,
72          CpuUtilization = slave.CpuUtilization
73        });
74      }
75      stats.SlaveStatistics = slaveStats;
76      //collecting user statistics slows down the db and results in timeouts.
77      //we have to find another way to deal with this. 
78      //until then the next line is commented out...
79      //stats.UserStatistics = dao.GetUserStatistics();
80      dao.AddStatistics(stats);
81    }
82
83    /// <summary>
84    /// Searches for slaves which are timed out, puts them and their task offline
85    /// </summary>
86    private void SetTimeoutSlavesOffline() {
87      var slaves = dao.GetSlaves(x => x.SlaveState != SlaveState.Offline);
88      foreach (DT.Slave slave in slaves) {
89        if (!slave.LastHeartbeat.HasValue || (DateTime.Now - slave.LastHeartbeat.Value) > HeuristicLab.Services.Hive.Properties.Settings.Default.SlaveHeartbeatTimeout) {
90          slave.SlaveState = DT.SlaveState.Offline;
91          SetTasksWaiting(slave.Id);
92          dao.UpdateSlave(slave);
93        }
94      }
95    }
96
97    /// <summary>
98    /// Looks for parent tasks which have FinishWhenChildJobsFinished and set their state to finished
99    /// </summary>
100    private void FinishParentTasks() {
101      var parentTasksToFinish = dao.GetParentTasks(dao.GetResources(x => true).Select(x => x.Id), 0, true);
102      foreach (var task in parentTasksToFinish) {
103        dao.UpdateTaskState(task.Id, TaskState.Finished, null, null, string.Empty);
104      }
105    }
106
107    private void SetTasksWaiting(Guid slaveId) {
108      var tasks = dao.GetTasks(x => x.State == TaskState.Calculating).Where(x => x.StateLog.Last().SlaveId == slaveId);
109      foreach (var j in tasks) {
110        DT.Task task = dao.UpdateTaskState(j.Id, TaskState.Waiting, slaveId, null, "Slave timed out.");
111        task.Command = null;
112        dao.UpdateTask(task);
113      }
114    }
115
116    /// <summary>
117    /// Looks for task which have not sent heartbeats for some time and reschedules them for calculation
118    /// </summary>
119    private void SetTimeoutTasksWaiting() {
120      var tasks = dao.GetTasks(x => (x.State == TaskState.Calculating && (DateTime.Now - x.LastHeartbeat) > HeuristicLab.Services.Hive.Properties.Settings.Default.CalculatingJobHeartbeatTimeout)
121                               || (x.State == TaskState.Transferring && (DateTime.Now - x.LastHeartbeat) > HeuristicLab.Services.Hive.Properties.Settings.Default.TransferringJobHeartbeatTimeout));
122      foreach (var j in tasks) {
123        DT.Task task = dao.UpdateTaskState(j.Id, TaskState.Waiting, null, null, "Slave timed out.");
124        task.Command = null;
125        dao.UpdateTask(task);
126      }
127    }
128  }
129}
Note: See TracBrowser for help on using the repository browser.