Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HiveStatistics/sources/HeuristicLab.Services.Hive/3.3/HiveStatisticsGenerator.cs @ 9578

Last change on this file since 9578 was 9578, checked in by pfleck, 11 years ago

#2063:
Task statistics contains only current state and no change history.
Statistics calculation is no longer based on increment updates.
TaskId is primary key in task statistics and EndTime is nullable.
Added TaskState to Task statistics.

File size: 12.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Transactions;
5using HeuristicLab.Services.Access;
6using HeuristicLab.Services.Hive.DataAccess;
7
8namespace HeuristicLab.Services.Hive {
9  public class HiveStatisticsGenerator : IStatisticsGenerator {
10
11    private IUserManager userManager { get { return ServiceLocator.Instance.UserManager; } }
12
13    private static readonly TimeSpan SmallestTimeSpan = new TimeSpan(0, 5, 0);
14
15    public void GenerateStatistics() {
16      using (var db = new HiveDataContext(Settings.Default.HeuristicLab_Hive_LinqConnectionString))
17      using (var transaction = new TransactionScope(TransactionScopeOption.Required, new TransactionOptions { IsolationLevel = IsolationLevel.ReadCommitted })) {
18
19        var newTime = UpdateDimensionTables(db);
20        db.SubmitChanges();
21
22        if (newTime != null) {
23          UpdateFactTables(newTime, db);
24          db.SubmitChanges();
25        }
26
27        transaction.Complete();
28      }
29    }
30
31    private DimTime UpdateDimensionTables(HiveDataContext db) {
32      var newTime = UpdateTime(db);
33      // Update other tables out of sync with time dimension?
34      UpdateUsers(db);
35      UpdateJobs(db);
36      UpdateClients(db);
37
38      return newTime;
39    }
40
41    private DimTime UpdateTime(HiveDataContext db) {
42      var lastUpdateTime =
43        (from t in db.DimTimes
44         orderby t.Time descending
45         select t.Time)
46        .FirstOrDefault();
47
48      var now = DateTime.Now;
49      DimTime newTime = null;
50
51      if (lastUpdateTime == default(DateTime) || lastUpdateTime + SmallestTimeSpan < now) {
52        newTime = new DimTime {
53          Time = new DateTime(now.Year, now.Month, now.Day, now.Hour, now.Minute - now.Minute % SmallestTimeSpan.Minutes, 0),
54          Hour = new DateTime(now.Year, now.Month, now.Day, now.Hour, 0, 0),
55          Day = new DateTime(now.Year, now.Month, now.Day, 0, 0, 0),
56          Month = new DateTime(now.Year, now.Month, 1, 0, 0, 0),
57          Year = new DateTime(now.Year, 1, 1, 0, 0, 0)
58        };
59        db.DimTimes.InsertOnSubmit(newTime);
60      }
61
62      return newTime;
63    }
64
65    private void UpdateUsers(HiveDataContext db) {
66      var newUsers =
67        from u in db.Resources.Where(x => x.OwnerUserId != null).Select(x => x.OwnerUserId.Value).Union(db.Jobs.Select(x => x.OwnerUserId))
68        where !db.DimUsers.Select(x => x.UserId).Contains(u)
69        select u;
70
71      var newDimUsers =
72        from u in newUsers.ToList()
73        select new DimUser {
74          UserId = u,
75          Name = userManager.GetUserById(u).UserName
76        };
77
78      db.DimUsers.InsertAllOnSubmit(newDimUsers);
79
80      // insert NULL-User
81      if (!db.DimUsers.Any(x => x.UserId == Guid.Empty)) {
82        db.DimUsers.InsertOnSubmit(new DimUser { UserId = Guid.Empty, Name = "NULL" });
83      }
84    }
85
86    private void UpdateJobs(HiveDataContext db) {
87      var newJobs =
88        from j in db.Jobs
89        where !db.DimJobs.Select(x => x.JobId).Contains(j.JobId)
90        select j;
91
92      var newDimJobs =
93        from j in newJobs.ToList()
94        select new DimJob {
95          JobId = j.JobId,
96          JobName = j.Name,
97          UserId = j.OwnerUserId,
98          UserName = userManager.GetUserById(j.OwnerUserId).UserName
99        };
100
101      db.DimJobs.InsertAllOnSubmit(newDimJobs);
102
103      // insert NULL-Job
104      if (!db.DimJobs.Any(x => x.JobId == Guid.Empty)) {
105        db.DimJobs.InsertOnSubmit(new DimJob { JobId = Guid.Empty, JobName = "NULL", UserId = Guid.Empty, UserName = "NULL" });
106      }
107    }
108
109    private void UpdateClients(HiveDataContext db) {
110      var removedClients =
111        from c in db.DimClients
112        where c.ExpirationTime == null &&
113              !db.Resources.OfType<Slave>().Select(x => x.ResourceId).Contains(c.ResourceId)
114        select c;
115
116      var modifiedClients =
117        from s in db.Resources.OfType<Slave>()
118        join c in db.DimClients on s.ResourceId equals c.ResourceId
119        where c.ExpirationTime == null
120              && (s.Name != c.Name || s.ParentResourceId != c.ResourceGroupId ||
121                  s.ParentResource.ParentResourceId != c.ResourceGroup2Id)
122        select new { Slave = s, Client = c };
123
124      foreach (var client in removedClients.Union(modifiedClients.Select(x => x.Client))) {
125        client.ExpirationTime = DateTime.Now;
126      }
127
128      var newClients =
129        from s in db.Resources.OfType<Slave>()
130        where !db.DimClients.Select(x => x.ResourceId).Contains(s.ResourceId)
131              || modifiedClients.Select(x => x.Slave.ResourceId).Contains(s.ResourceId)
132        select new {
133          Slave = s,
134          Group = s.ParentResourceId,
135          Group2 = s.ParentResource.ParentResourceId
136        };
137
138      var newDimClients =
139        from s in newClients.ToList()
140        select new DimClient {
141          ResourceId = s.Slave.ResourceId,
142          Name = s.Slave.Name,
143          ExpirationTime = null,
144          ResourceGroupId = s.Group,
145          ResourceGroup2Id = s.Group2
146        };
147
148      db.DimClients.InsertAllOnSubmit(newDimClients);
149    }
150
151    private void UpdateFactTables(DimTime newTime, HiveDataContext db) {
152      UpdateClientInfoFacts(newTime, db);
153      UpdateTaskFacts(newTime, db);
154    }
155
156    private void UpdateClientInfoFacts(DimTime newTime, HiveDataContext db) {
157      var lastFacts =
158        from cf in db.FactClientInfos
159        join r in db.DimClients on cf.ClientId equals r.Id
160        group cf by r.ResourceId into grpFacts
161        select new {
162          ResourceId = grpFacts.Key,
163          Fact = grpFacts.OrderByDescending(x => x.Time).First(),
164        };
165
166      var slaves =
167        from s in db.Resources.OfType<Slave>()
168        join c in db.DimClients on s.ResourceId equals c.ResourceId
169        join lcf in lastFacts on c.ResourceId equals lcf.ResourceId into joinCf
170        from cf in joinCf.DefaultIfEmpty()
171        where c.ExpirationTime == null
172        select new {
173          Slave = s,
174          Client = c,
175          LastFact = cf != null ? cf.Fact : null
176        };
177
178      var clientFacts =
179        from s in slaves.ToList()
180        select new FactClientInfo {
181          DimClient = s.Client,
182          DimTime = newTime,
183          UserId = s.Slave.OwnerUserId ?? Guid.Empty,
184          NumUsedCores =
185            s.Slave.Cores != null && s.Slave.FreeCores != null
186              ? s.Slave.Cores.Value - s.Slave.FreeCores.Value
187              : 0,
188          NumTotalCores = s.Slave.Cores ?? 0,
189          UsedMemory =
190            s.Slave.Memory != null && s.Slave.FreeMemory != null
191              ? s.Slave.Memory.Value - s.Slave.FreeMemory.Value
192              : 0,
193          TotalMemory = s.Slave.Memory ?? 0,
194          CpuUtilization = s.Slave.CpuUtilization,
195          TrafficIn = 0,
196          TrafficOut = 0,
197          TotalTimeIdle = CalcNewTotalTime(s.LastFact, newTime.Time,
198                                           x => x.TotalTimeIdle,
199                                           () => s.Slave.SlaveState == SlaveState.Idle && s.Slave.IsAllowedToCalculate),
200          TotalTimeCalculating = CalcNewTotalTime(s.LastFact, newTime.Time,
201                                                  x => x.TotalTimeCalculating,
202                                                  () => s.Slave.SlaveState == SlaveState.Calculating),
203          TotalTimeTransferring = 0.0,
204          TotalTimeUnavailable = CalcNewTotalTime(s.LastFact, newTime.Time,
205                                                  x => x.TotalTimeUnavailable,
206                                                  () => s.Slave.SlaveState == SlaveState.Idle && !s.Slave.IsAllowedToCalculate),
207          TotalTimeOffline = CalcNewTotalTime(s.LastFact, newTime.Time,
208                                              x => x.TotalTimeOffline,
209                                              () => s.Slave.SlaveState == SlaveState.Offline)
210        };
211
212      db.FactClientInfos.InsertAllOnSubmit(clientFacts);
213    }
214
215    private double CalcNewTotalTime(FactClientInfo lastFact, DateTime newTime, Func<FactClientInfo, double> selector, Func<bool> condition) {
216      if (lastFact == null) {
217        return 0.0;
218      }
219      return condition()
220               ? selector(lastFact) + (newTime - lastFact.Time).TotalMinutes
221               : selector(lastFact);
222    }
223
224    private void UpdateTaskFacts(DimTime newTime, HiveDataContext db) {
225      // old Task facts
226      var oldFacts =
227        from fact in db.FactTasks
228        where fact.EndTime == null
229        select fact;
230
231      // query Task facts data
232      var newFacts =
233        from task in db.Tasks
234        where !task.IsParentTask
235          && (!db.FactTasks.Select(fact => fact.TaskId).Contains(task.TaskId) || oldFacts.Select(fact => fact.TaskId).Contains(task.TaskId))
236        join lastFact in oldFacts on task.TaskId equals lastFact.TaskId into lastFactPerTask
237        from lastFact in lastFactPerTask.DefaultIfEmpty()
238        let lastSlaveId = task.StateLogs.OrderByDescending(sl => sl.DateTime).First(sl => sl.SlaveId != null).SlaveId
239        join client in db.DimClients.Where(client => client.ExpirationTime == null) on lastSlaveId equals client.ResourceId into clientsPerSlaveId
240        from client in clientsPerSlaveId.DefaultIfEmpty()
241        select new {
242          Task = task,
243          TaskDataSize = task.JobData.DataSize,
244          StateLogs = task.StateLogs.OrderBy(sl => sl.DateTime),
245          LastClientId = client != null ? client.Id : default(Guid?),
246          LastFact = lastFact
247        };
248
249      // new Task facts
250      var newTaskFacts =
251        from t in newFacts.ToList()
252        let stateLogsLinkedList = new LinkedList<StateLog>(t.StateLogs)
253        select new FactTask {
254          TaskId = t.Task.TaskId,
255          JobId = t.Task.JobId,
256          DimTimeStart = t.LastFact != null ? t.LastFact.DimTimeStart : newTime,
257          DimTimeEnd = new[] { TaskState.Finished, TaskState.Failed, TaskState.Aborted }.Contains(t.Task.State) ? newTime : null,
258          LastClientId = t.LastClientId ?? (t.LastFact != null ? t.LastFact.LastClientId : Guid.Empty),
259          Priority = t.Task.Priority,
260          CoresRequired = t.Task.CoresNeeded,
261          MemoryRequired = t.Task.MemoryNeeded,
262          TaskSize = t.LastFact == null ? t.TaskDataSize : t.LastFact.TaskSize,
263          ResultSize = t.Task.State == TaskState.Finished ? t.TaskDataSize : default(long?),
264          NumCalculationRuns = stateLogsLinkedList.EnumerateNodes()
265            .Count(sl => sl.Value.State == TaskState.Calculating && sl.NextIs(nsl => nsl.State == TaskState.Transferring)),
266          NumRetries = stateLogsLinkedList.EnumerateNodes()
267            .Count(sl => sl.Value.State == TaskState.Calculating && sl.Next != null && sl.NextIs(nsl => nsl.State != TaskState.Transferring)),
268          TotalWaitingTime = SumTotalTime(stateLogsLinkedList, stateLog => stateLog.Value.State == TaskState.Waiting),
269          TotalRuntime = SumTotalTime(stateLogsLinkedList, stateLog => stateLog.Value.State == TaskState.Calculating && stateLog.NextIs(x => x.State == TaskState.Transferring)),
270          TotalTransferTime = SumTotalTime(stateLogsLinkedList, stateLog => stateLog.Value.State == TaskState.Transferring),
271          TaskState = t.Task.State
272        };
273
274      db.FactTasks.DeleteAllOnSubmit(oldFacts.ToList());
275      db.FactTasks.InsertAllOnSubmit(newTaskFacts);
276    }
277
278    private double SumTotalTime(LinkedList<StateLog> stateLogs, Predicate<LinkedListNode<StateLog>> predicate) {
279      return stateLogs.EnumerateNodes()
280                      .Where(sl => predicate(sl))
281                      .Sum(stateLog => stateLog.Next != null ? (stateLog.Next.Value.DateTime - stateLog.Value.DateTime).TotalMinutes : 0);
282    }
283  }
284
285  public static class LinkedListNodeExtensions {
286    public static IEnumerable<LinkedListNode<T>> EnumerateNodes<T>(this LinkedList<T> list) {
287      var node = list.First;
288      while (node != null) {
289        yield return node;
290        node = node.Next;
291      }
292    }
293
294    public static bool NextIs<T>(this LinkedListNode<T> node, Predicate<T> predicate) {
295      return node.Next != null && predicate(node.Next.Value);
296    }
297  }
298}
Note: See TracBrowser for help on using the repository browser.