Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HiveStatistics/sources/HeuristicLab.Services.Hive/3.3/HiveStatisticsGenerator.cs @ 9646

Last change on this file since 9646 was 9646, checked in by pfleck, 11 years ago

#2063:
Changed year in headers from 2012 to 2013.
Minor reformattings in chart helpers.
Refactored and simplified some statistics generator methods.

File size: 12.0 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Data.Linq;
4using System.Linq;
5using System.Transactions;
6using HeuristicLab.Services.Access;
7using HeuristicLab.Services.Hive.DataAccess;
8
9namespace HeuristicLab.Services.Hive {
10  public class HiveStatisticsGenerator : IStatisticsGenerator {
11    private IUserManager userManager { get { return ServiceLocator.Instance.UserManager; } }
12
13    private static readonly TimeSpan SmallestTimeSpan = new TimeSpan(0, 5, 0);
14
15    public void GenerateStatistics() {
16      using (var db = new HiveDataContext(Settings.Default.HeuristicLab_Hive_LinqConnectionString))
17      using (var transaction = new TransactionScope(TransactionScopeOption.Required, new TransactionOptions { IsolationLevel = IsolationLevel.ReadCommitted })) {
18
19        var newTime = UpdateDimensionTables(db);
20        db.SubmitChanges();
21
22        if (newTime != null) {
23          UpdateFactTables(newTime, db);
24          try {
25            db.SubmitChanges();
26          }
27          catch (DuplicateKeyException) {
28            // Propable change from summertime to wintertime, resulting in overlapping times
29            // On wintertime to summertime change, slave timeouts and a fact gap will occur
30          }
31        }
32
33        transaction.Complete();
34      }
35    }
36
37    private DimTime UpdateDimensionTables(HiveDataContext db) {
38      var newTime = UpdateTime(db);
39
40      UpdateUsers(db);
41      UpdateJobs(db);
42      UpdateClients(db);
43
44      return newTime;
45    }
46
47    private DimTime UpdateTime(HiveDataContext db) {
48      var lastUpdateTime =
49        (from t in db.DimTimes
50         orderby t.Time descending
51         select t.Time)
52        .FirstOrDefault();
53
54      var now = DateTime.Now;
55      DimTime newTime = null;
56
57      if (lastUpdateTime == default(DateTime) || lastUpdateTime + SmallestTimeSpan < now) {
58        newTime = new DimTime {
59          Time = new DateTime(now.Year, now.Month, now.Day, now.Hour, now.Minute - now.Minute % SmallestTimeSpan.Minutes, 0),
60          Hour = new DateTime(now.Year, now.Month, now.Day, now.Hour, 0, 0),
61          Day = new DateTime(now.Year, now.Month, now.Day, 0, 0, 0),
62          Month = new DateTime(now.Year, now.Month, 1, 0, 0, 0),
63          Year = new DateTime(now.Year, 1, 1, 0, 0, 0)
64        };
65        db.DimTimes.InsertOnSubmit(newTime);
66      }
67
68      return newTime;
69    }
70
71    private void UpdateUsers(HiveDataContext db) {
72      var newUsers =
73        from u in db.Resources.Where(x => x.OwnerUserId != null).Select(x => x.OwnerUserId.Value).Union(db.Jobs.Select(x => x.OwnerUserId))
74        where !db.DimUsers.Select(x => x.UserId).Contains(u)
75        select u;
76
77      var newDimUsers =
78        from u in newUsers.ToList()
79        select new DimUser {
80          UserId = u,
81          Name = userManager.GetUserById(u).UserName
82        };
83
84      db.DimUsers.InsertAllOnSubmit(newDimUsers);
85
86      // insert NULL-User
87      if (!db.DimUsers.Any(x => x.UserId == Guid.Empty)) {
88        db.DimUsers.InsertOnSubmit(new DimUser { UserId = Guid.Empty, Name = "NULL" });
89      }
90    }
91
92    private void UpdateJobs(HiveDataContext db) {
93      var newJobs =
94        from j in db.Jobs
95        where !db.DimJobs.Select(x => x.JobId).Contains(j.JobId)
96        select j;
97
98      var newDimJobs =
99        from j in newJobs.ToList()
100        select new DimJob {
101          JobId = j.JobId,
102          JobName = j.Name,
103          UserId = j.OwnerUserId,
104          UserName = userManager.GetUserById(j.OwnerUserId).UserName
105        };
106
107      db.DimJobs.InsertAllOnSubmit(newDimJobs);
108
109      // insert NULL-Job
110      if (!db.DimJobs.Any(x => x.JobId == Guid.Empty)) {
111        db.DimJobs.InsertOnSubmit(new DimJob { JobId = Guid.Empty, JobName = "NULL", UserId = Guid.Empty, UserName = "NULL" });
112      }
113    }
114
115    private void UpdateClients(HiveDataContext db) {
116      var removedClients =
117        from c in db.DimClients
118        where c.ExpirationTime == null &&
119              !db.Resources.OfType<Slave>().Select(x => x.ResourceId).Contains(c.ResourceId)
120        select c;
121
122      var modifiedClients =
123        from s in db.Resources.OfType<Slave>()
124        join c in db.DimClients on s.ResourceId equals c.ResourceId
125        where c.ExpirationTime == null
126              && (s.Name != c.Name || s.ParentResourceId != c.ResourceGroupId ||
127                  s.ParentResource.ParentResourceId != c.ResourceGroup2Id)
128        select new { Slave = s, Client = c };
129
130      foreach (var client in removedClients.Union(modifiedClients.Select(x => x.Client))) {
131        client.ExpirationTime = DateTime.Now;
132      }
133
134      var newClients =
135        from s in db.Resources.OfType<Slave>()
136        where !db.DimClients.Select(x => x.ResourceId).Contains(s.ResourceId)
137              || modifiedClients.Select(x => x.Slave.ResourceId).Contains(s.ResourceId)
138        select new {
139          Slave = s,
140          Group = s.ParentResourceId,
141          Group2 = s.ParentResource.ParentResourceId
142        };
143
144      var newDimClients =
145        from s in newClients.ToList()
146        select new DimClient {
147          ResourceId = s.Slave.ResourceId,
148          Name = s.Slave.Name,
149          ExpirationTime = null,
150          ResourceGroupId = s.Group,
151          ResourceGroup2Id = s.Group2
152        };
153
154      db.DimClients.InsertAllOnSubmit(newDimClients);
155    }
156
157    private void UpdateFactTables(DimTime newTime, HiveDataContext db) {
158      UpdateClientInfoFacts(newTime, db);
159      UpdateTaskFacts(newTime, db);
160    }
161
162    private void UpdateClientInfoFacts(DimTime newTime, HiveDataContext db) {
163      var time = newTime.Time;
164
165      var lastFacts =
166        from cf in db.FactClientInfos
167        join r in db.DimClients on cf.ClientId equals r.Id
168        group cf by r.ResourceId into grpFacts
169        select new {
170          ResourceId = grpFacts.Key,
171          Fact = grpFacts.OrderByDescending(x => x.Time).First(),
172        };
173
174      var slaves =
175        from s in db.Resources.OfType<Slave>()
176        join c in db.DimClients on s.ResourceId equals c.ResourceId
177        join lcf in lastFacts on c.ResourceId equals lcf.ResourceId into joinCf
178        from cf in joinCf.DefaultIfEmpty()
179        where c.ExpirationTime == null
180        select new {
181          Slave = s,
182          Client = c,
183          LastFact = cf != null ? cf.Fact : null
184        };
185
186      var clientFacts =
187        from s in slaves.ToList()
188        let duration = s.LastFact != null ? (time - s.LastFact.Time).TotalMinutes : SmallestTimeSpan.TotalMinutes
189        select new FactClientInfo {
190          DimClient = s.Client,
191          DimTime = newTime,
192          UserId = s.Slave.OwnerUserId ?? Guid.Empty,
193          NumUsedCores = s.Slave.Cores != null && s.Slave.FreeCores != null
194                         ? s.Slave.Cores.Value - s.Slave.FreeCores.Value : 0,
195          NumTotalCores = s.Slave.Cores ?? 0,
196          UsedMemory = s.Slave.Memory != null && s.Slave.FreeMemory != null
197                       ? s.Slave.Memory.Value - s.Slave.FreeMemory.Value : 0,
198          TotalMemory = s.Slave.Memory ?? 0,
199          CpuUtilization = s.Slave.CpuUtilization,
200          TrafficIn = 0,
201          TrafficOut = 0,
202          TotalTimeIdle = s.Slave.SlaveState == SlaveState.Idle && s.Slave.IsAllowedToCalculate
203                                  ? duration : 0.0,
204          TotalTimeCalculating = s.Slave.SlaveState == SlaveState.Calculating
205                                  ? duration : 0.0,
206          TotalTimeTransferring = 0.0,
207          TotalTimeUnavailable = s.Slave.SlaveState == SlaveState.Idle && !s.Slave.IsAllowedToCalculate
208                                  ? duration : 0.0,
209          TotalTimeOffline = s.Slave.SlaveState == SlaveState.Offline
210                                  ? duration : 0.0
211        };
212
213      db.FactClientInfos.InsertAllOnSubmit(clientFacts);
214    }
215
216    private void UpdateTaskFacts(DimTime newTime, HiveDataContext db) {
217      // old Task facts
218      var oldFacts =
219        from fact in db.FactTasks
220        where fact.EndTime == null
221        select fact;
222
223      // query Task facts data
224      var newFacts =
225        from task in db.Tasks
226        where !task.IsParentTask
227          && (!db.FactTasks.Select(fact => fact.TaskId).Contains(task.TaskId) || oldFacts.Select(fact => fact.TaskId).Contains(task.TaskId))
228        join lastFact in oldFacts on task.TaskId equals lastFact.TaskId into lastFactPerTask
229        from lastFact in lastFactPerTask.DefaultIfEmpty()
230        let lastSlaveId = task.StateLogs.OrderByDescending(sl => sl.DateTime).First(sl => sl.SlaveId != null).SlaveId
231        join client in db.DimClients.Where(client => client.ExpirationTime == null) on lastSlaveId equals client.ResourceId into clientsPerSlaveId
232        from client in clientsPerSlaveId.DefaultIfEmpty()
233        select new {
234          Task = task,
235          TaskDataSize = task.JobData.DataSize,
236          StateLogs = task.StateLogs.OrderBy(sl => sl.DateTime),
237          LastClientId = client != null ? client.Id : default(Guid?),
238          LastFact = lastFact
239        };
240
241      // new Task facts
242      var newTaskFacts =
243        from t in newFacts.ToList()
244        let stateLogsLinkedList = new LinkedList<StateLog>(t.StateLogs)
245        select new FactTask {
246          TaskId = t.Task.TaskId,
247          JobId = t.Task.JobId,
248          DimTimeStart = t.LastFact != null ? t.LastFact.DimTimeStart : newTime,
249          DimTimeEnd = new[] { TaskState.Finished, TaskState.Failed, TaskState.Aborted }.Contains(t.Task.State) ? newTime : null,
250          LastClientId = t.LastClientId ?? (t.LastFact != null ? t.LastFact.LastClientId : Guid.Empty),
251          Priority = t.Task.Priority,
252          CoresRequired = t.Task.CoresNeeded,
253          MemoryRequired = t.Task.MemoryNeeded,
254          TaskSize = t.LastFact == null ? t.TaskDataSize : t.LastFact.TaskSize,
255          ResultSize = t.Task.State == TaskState.Finished ? t.TaskDataSize : default(long?),
256          NumCalculationRuns = stateLogsLinkedList.CountCalculationRuns(),
257          NumRetries = stateLogsLinkedList.CountRetries(),
258          TotalWaitingTime = stateLogsLinkedList.SumTotalTimeWhere(stateLog => stateLog.Value.State == TaskState.Waiting),
259          TotalRuntime = stateLogsLinkedList.SumTotalTimeWhere(stateLog => stateLog.Value.State == TaskState.Calculating && stateLog.NextIs(x => x.State == TaskState.Transferring)),
260          TotalTransferTime = stateLogsLinkedList.SumTotalTimeWhere(stateLog => stateLog.Value.State == TaskState.Transferring),
261          TaskState = t.Task.State
262        };
263
264      db.FactTasks.DeleteAllOnSubmit(oldFacts.ToList());
265      db.FactTasks.InsertAllOnSubmit(newTaskFacts);
266    }
267  }
268
269  public static class StateLogLinkedListExtensions {
270    public static int CountCalculationRuns(this LinkedList<StateLog> stateLogs) {
271      return stateLogs.EnumerateNodes()
272                      .Count(sl => sl.Value.State == TaskState.Calculating && sl.NextIs(nsl => nsl.State == TaskState.Transferring));
273    }
274
275    public static int CountRetries(this LinkedList<StateLog> stateLogs) {
276      return stateLogs.EnumerateNodes()
277                      .Count(sl => sl.Value.State == TaskState.Calculating && sl.Next != null && sl.NextIs(nsl => nsl.State != TaskState.Transferring));
278    }
279
280    public static double SumTotalTimeWhere(this LinkedList<StateLog> stateLogs, Predicate<LinkedListNode<StateLog>> predicate) {
281      return stateLogs.EnumerateNodes()
282                      .Where(stateLog => predicate(stateLog))
283                      .Sum(stateLog => stateLog.Next != null ? (stateLog.Next.Value.DateTime - stateLog.Value.DateTime).TotalMinutes : 0.0);
284    }
285  }
286
287  public static class LinkedListExtensions {
288    public static IEnumerable<LinkedListNode<T>> EnumerateNodes<T>(this LinkedList<T> list) {
289      var node = list.First;
290      while (node != null) {
291        yield return node;
292        node = node.Next;
293      }
294    }
295
296    public static bool NextIs<T>(this LinkedListNode<T> node, Predicate<T> predicate) {
297      return node.Next != null && predicate(node.Next.Value);
298    }
299  }
300}
Note: See TracBrowser for help on using the repository browser.