source: branches/HiveStatistics/sources/HeuristicLab.Services.Hive/3.3/Manager/EventManager.cs @ 12468

Last change on this file since 12468 was 12468, checked in by dglaser, 6 years ago

#2388:
HeuristicLab.Services.Hive.DataAccess-3.3:

  • Added PersistenceManager with corresponding daos
  • Updated SQL Scripts
  • Fixed folder structure (interfaces, manager)
  • Removed duplicated IHiveDao and HiveDao (the HiveDao/OptimizedHiveDao that is actually used is located in HeuristicLab.Services.Hive)

HeuristicLab.Service.Hive-3.3:

  • Added PersistenceManager to the ServiceLocater
  • Updated and improved the HiveStatisticsGenerator
  • Updated HiveJanitor
File size: 5.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Services.Hive.DataAccess;
26using HeuristicLab.Services.Hive.DataAccess.Interfaces;
27using DT = HeuristicLab.Services.Hive.DataTransfer;
28
29
30namespace HeuristicLab.Services.Hive {
31  /// <summary>
32  /// This class offers methods for cleaning up offline slaves and task
33  /// </summary>
34  public class EventManager : IEventManager {
35    private IHiveDao dao {
36      get { return ServiceLocator.Instance.HiveDao; }
37    }
38    private IAuthorizationManager auth {
39      get { return ServiceLocator.Instance.AuthorizationManager; }
40    }
41    private ILogger log {
42      get { return LogFactory.GetLogger(this.GetType().Namespace); }
43    }
44    private ITransactionManager trans {
45      get { return ServiceLocator.Instance.TransactionManager; }
46    }
47
48    public void Cleanup() {
49      trans.UseTransaction(() => {
50        SetTimeoutSlavesOffline();
51        SetTimeoutTasksWaiting();
52        DeleteObsoleteSlaves();
53      });
54
55      trans.UseTransaction(() => {
56        FinishParentTasks();
57        UpdateStatistics();
58      });
59    }
60
61    private void UpdateStatistics() {
62      var slaves = dao.GetSlaves(x => x.SlaveState == SlaveState.Calculating || x.SlaveState == SlaveState.Idle);
63
64      var stats = new DataTransfer.Statistics();
65      stats.TimeStamp = DateTime.Now;
66      var slaveStats = new List<DT.SlaveStatistics>();
67      foreach (var slave in slaves) {
68        slaveStats.Add(new DT.SlaveStatistics() {
69          SlaveId = slave.Id,
70          Cores = slave.Cores.HasValue ? slave.Cores.Value : 0,
71          FreeCores = slave.FreeCores.HasValue ? slave.FreeCores.Value : 0,
72          Memory = slave.Memory.HasValue ? slave.Memory.Value : 0,
73          FreeMemory = slave.FreeMemory.HasValue ? slave.FreeMemory.Value : 0,
74          CpuUtilization = slave.CpuUtilization
75        });
76      }
77      stats.SlaveStatistics = slaveStats;
78      //collecting user statistics slows down the db and results in timeouts.
79      //we have to find another way to deal with this. 
80      //until then the next line is commented out...
81      //stats.UserStatistics = dtoDao.GetUserStatistics();
82      dao.AddStatistics(stats);
83    }
84
85    /// <summary>
86    /// Searches for slaves which are timed out, puts them and their task offline
87    /// </summary>
88    private void SetTimeoutSlavesOffline() {
89      var slaves = dao.GetSlaves(x => x.SlaveState != SlaveState.Offline);
90      foreach (DT.Slave slave in slaves) {
91        if (!slave.LastHeartbeat.HasValue || (DateTime.Now - slave.LastHeartbeat.Value) > HeuristicLab.Services.Hive.Properties.Settings.Default.SlaveHeartbeatTimeout) {
92          slave.SlaveState = DT.SlaveState.Offline;
93          dao.UpdateSlave(slave);
94        }
95      }
96    }
97
98    /// <summary>
99    /// Looks for parent tasks which have FinishWhenChildJobsFinished and set their state to finished
100    /// </summary>
101    private void FinishParentTasks() {
102      var parentTasksToFinish = dao.GetParentTasks(dao.GetResources(x => true).Select(x => x.Id), 0, true);
103      foreach (var task in parentTasksToFinish) {
104        dao.UpdateTaskState(task.Id, TaskState.Finished, null, null, string.Empty);
105      }
106    }
107
108    /// <summary>
109    /// Looks for task which have not sent heartbeats for some time and reschedules them for calculation
110    /// </summary>
111    private void SetTimeoutTasksWaiting() {
112      var tasks = dao.GetTasks(x => (x.State == TaskState.Calculating && (DateTime.Now - x.LastHeartbeat) > HeuristicLab.Services.Hive.Properties.Settings.Default.CalculatingJobHeartbeatTimeout)
113                               || (x.State == TaskState.Transferring && (DateTime.Now - x.LastHeartbeat) > HeuristicLab.Services.Hive.Properties.Settings.Default.TransferringJobHeartbeatTimeout));
114      foreach (var j in tasks) {
115        DT.Task task = dao.UpdateTaskState(j.Id, TaskState.Waiting, null, null, "Slave timed out.");
116        task.Command = null;
117        dao.UpdateTask(task);
118      }
119    }
120
121    /// <summary>
122    /// Searches for slaves that are disposable and deletes them if they were offline for too long
123    /// </summary>
124    private void DeleteObsoleteSlaves() {
125      var slaves = dao.GetSlaves(x => x.IsDisposable.GetValueOrDefault() &&
126                                      x.SlaveState == SlaveState.Offline &&
127                                      (DateTime.Now - x.LastHeartbeat) > Hive.Properties.Settings.Default.SweepInterval)
128                                .Select(x => x.Id)
129                                .ToArray();
130
131      foreach (Guid slaveId in slaves) {
132        var downtimesAvailable = dao.GetDowntimes(x => x.ResourceId == slaveId).Any();
133        if (!downtimesAvailable) {
134          dao.DeleteSlave(slaveId);
135        }
136      }
137    }
138  }
139}
Note: See TracBrowser for help on using the repository browser.