source: branches/HiveProjectManagement/HeuristicLab.Services.Hive/3.3/Manager/EventManager.cs @ 15630

Last change on this file since 15630 was 15630, checked in by jzenisek, 3 years ago

#2839

  • updated Heartbeat processing (regarding: checking against AssignedJobResources and handling of the updated Job deletion routine)
  • updated Job deletion routine(still in progress at GenerateStatistics)
File size: 5.1 KB
RevLine 
[12853]1#region License Information
2/* HeuristicLab
[14185]3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[12853]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[12789]23using System.Linq;
24using HeuristicLab.Services.Hive.DataAccess;
25using HeuristicLab.Services.Hive.DataAccess.Interfaces;
26
27namespace HeuristicLab.Services.Hive.Manager {
[12861]28  public class EventManager : IEventManager {
[12789]29    private const string SlaveTimeout = "Slave timed out.";
30    private IPersistenceManager PersistenceManager {
31      get { return ServiceLocator.Instance.PersistenceManager; }
32    }
33
34    public void Cleanup() {
[12858]35      var pm = PersistenceManager;
[15630]36
[12858]37      pm.UseTransaction(() => {
[15630]38        FinishJobDeletion(pm);
39        pm.SubmitChanges();
40      });
41
42      pm.UseTransaction(() => {
[12858]43        SetTimeoutSlavesOffline(pm);
44        SetTimeoutTasksWaiting(pm);
45        DeleteObsoleteSlaves(pm);
46        pm.SubmitChanges();
47      });
[12789]48
[12858]49      pm.UseTransaction(() => {
50        FinishParentTasks(pm);
51        pm.SubmitChanges();
52      });
[12789]53    }
54
55    /// <summary>
[15630]56    /// Deletes all jobs which are in state "DeletionPending" (this will include all corresponding tasks).
57    /// The state "DeletionPending" is set by HiveJanitor > StatisticsGenerator
58    /// </summary>
59    private void FinishJobDeletion(IPersistenceManager pm) {
60      var jobDao = pm.JobDao;
61      jobDao.DeleteByState(JobState.DeletionPending);
62    }
63
64    /// <summary>
[12789]65    /// Searches for slaves which are timed out, puts them and their task offline
66    /// </summary>
67    private void SetTimeoutSlavesOffline(IPersistenceManager pm) {
68      var slaveDao = pm.SlaveDao;
69      var slaves = slaveDao.GetOnlineSlaves();
70      foreach (var slave in slaves) {
71        if (!slave.LastHeartbeat.HasValue ||
72            (DateTime.Now - slave.LastHeartbeat.Value) >
[12866]73            Properties.Settings.Default.SlaveHeartbeatTimeout) {
[12789]74          slave.SlaveState = SlaveState.Offline;
75        }
76      }
77    }
78
79    /// <summary>
80    /// Looks for parent tasks which have FinishWhenChildJobsFinished and set their state to finished
81    /// </summary>
82    private void FinishParentTasks(IPersistenceManager pm) {
83      var resourceDao = pm.ResourceDao;
84      var taskDao = pm.TaskDao;
85      var resourceIds = resourceDao.GetAll().Select(x => x.ResourceId).ToList();
86      var parentTasksToFinish = taskDao.GetParentTasks(resourceIds, 0, true);
87      foreach (var task in parentTasksToFinish) {
88        task.State = TaskState.Finished;
89        task.StateLogs.Add(new StateLog {
90          State = task.State,
91          SlaveId = null,
92          UserId = null,
93          Exception = string.Empty,
94          DateTime = DateTime.Now
95        });
96      }
97    }
98
99    /// <summary>
100    /// Looks for task which have not sent heartbeats for some time and reschedules them for calculation
101    /// </summary>
102    private void SetTimeoutTasksWaiting(IPersistenceManager pm) {
103      var taskDao = pm.TaskDao;
[12866]104      var tasks = taskDao.GetAll().Where(x => (x.State == TaskState.Calculating && (DateTime.Now - x.LastHeartbeat) > Properties.Settings.Default.CalculatingJobHeartbeatTimeout)
105                                           || (x.State == TaskState.Transferring && (DateTime.Now - x.LastHeartbeat) > Properties.Settings.Default.TransferringJobHeartbeatTimeout));
[12789]106      foreach (var task in tasks) {
107        task.State = TaskState.Waiting;
108        task.StateLogs.Add(new StateLog {
109          State = task.State,
110          SlaveId = null,
111          UserId = null,
112          Exception = SlaveTimeout,
113          DateTime = DateTime.Now
114        });
115        task.Command = null;
116      }
117    }
118
119    /// <summary>
120    /// Searches for slaves that are disposable and deletes them if they were offline for too long
121    /// </summary>
122    private void DeleteObsoleteSlaves(IPersistenceManager pm) {
123      var slaveDao = pm.SlaveDao;
124      var downtimeDao = pm.DowntimeDao;
125      var slaveIds = slaveDao.GetAll()
126        .Where(x => x.IsDisposable.GetValueOrDefault()
127                 && x.SlaveState == SlaveState.Offline
[12866]128                 && (DateTime.Now - x.LastHeartbeat) > Properties.Settings.Default.SweepInterval)
[12789]129        .Select(x => x.ResourceId)
130        .ToList();
131      foreach (var id in slaveIds) {
132        bool downtimesAvailable = downtimeDao.GetByResourceId(id).Any();
133        if (!downtimesAvailable) {
134          slaveDao.Delete(id);
135        }
136      }
137    }
138  }
139}
Note: See TracBrowser for help on using the repository browser.