using System; using System.Collections.Generic; using System.Data.Linq; using System.Linq; using System.Transactions; using HeuristicLab.Services.Access; using HeuristicLab.Services.Hive.DataAccess; namespace HeuristicLab.Services.Hive { public class HiveStatisticsGenerator : IStatisticsGenerator { private IUserManager userManager { get { return ServiceLocator.Instance.UserManager; } } private static readonly TimeSpan SmallestTimeSpan = new TimeSpan(0, 5, 0); public void GenerateStatistics() { using (var db = new HiveDataContext(Settings.Default.HeuristicLab_Hive_LinqConnectionString)) using (var transaction = new TransactionScope(TransactionScopeOption.Required, new TransactionOptions { IsolationLevel = IsolationLevel.ReadCommitted })) { var newTime = UpdateDimensionTables(db); db.SubmitChanges(); if (newTime != null) { UpdateFactTables(newTime, db); try { db.SubmitChanges(); } catch (DuplicateKeyException) { // Propable change from summertime to wintertime, resulting in overlapping times // On wintertime to summertime change, slave timeouts and a fact gap will occur } } transaction.Complete(); } } private DimTime UpdateDimensionTables(HiveDataContext db) { var newTime = UpdateTime(db); UpdateUsers(db); UpdateJobs(db); UpdateClients(db); return newTime; } private DimTime UpdateTime(HiveDataContext db) { var lastUpdateTime = (from t in db.DimTimes orderby t.Time descending select t.Time) .FirstOrDefault(); var now = DateTime.Now; DimTime newTime = null; if (lastUpdateTime == default(DateTime) || lastUpdateTime + SmallestTimeSpan < now) { newTime = new DimTime { Time = new DateTime(now.Year, now.Month, now.Day, now.Hour, now.Minute - now.Minute % SmallestTimeSpan.Minutes, 0), Hour = new DateTime(now.Year, now.Month, now.Day, now.Hour, 0, 0), Day = new DateTime(now.Year, now.Month, now.Day, 0, 0, 0), Month = new DateTime(now.Year, now.Month, 1, 0, 0, 0), Year = new DateTime(now.Year, 1, 1, 0, 0, 0) }; db.DimTimes.InsertOnSubmit(newTime); } return newTime; } private void UpdateUsers(HiveDataContext db) { var newUsers = from u in db.Resources.Where(x => x.OwnerUserId != null).Select(x => x.OwnerUserId.Value).Union(db.Jobs.Select(x => x.OwnerUserId)) where !db.DimUsers.Select(x => x.UserId).Contains(u) select u; var newDimUsers = from u in newUsers.ToList() select new DimUser { UserId = u, Name = userManager.GetUserById(u).UserName }; db.DimUsers.InsertAllOnSubmit(newDimUsers); // insert NULL-User if (!db.DimUsers.Any(x => x.UserId == Guid.Empty)) { db.DimUsers.InsertOnSubmit(new DimUser { UserId = Guid.Empty, Name = "NULL" }); } } private void UpdateJobs(HiveDataContext db) { var newJobs = from j in db.Jobs where !db.DimJobs.Select(x => x.JobId).Contains(j.JobId) select j; var newDimJobs = from j in newJobs.ToList() select new DimJob { JobId = j.JobId, JobName = j.Name, UserId = j.OwnerUserId, UserName = userManager.GetUserById(j.OwnerUserId).UserName }; db.DimJobs.InsertAllOnSubmit(newDimJobs); // insert NULL-Job if (!db.DimJobs.Any(x => x.JobId == Guid.Empty)) { db.DimJobs.InsertOnSubmit(new DimJob { JobId = Guid.Empty, JobName = "NULL", UserId = Guid.Empty, UserName = "NULL" }); } } private void UpdateClients(HiveDataContext db) { var removedClients = from c in db.DimClients where c.ExpirationTime == null && !db.Resources.OfType().Select(x => x.ResourceId).Contains(c.ResourceId) select c; var modifiedClients = from s in db.Resources.OfType() join c in db.DimClients on s.ResourceId equals c.ResourceId where c.ExpirationTime == null && (s.Name != c.Name || s.ParentResourceId != c.ResourceGroupId || s.ParentResource.ParentResourceId != c.ResourceGroup2Id) select new { Slave = s, Client = c }; foreach (var client in removedClients.Union(modifiedClients.Select(x => x.Client))) { client.ExpirationTime = DateTime.Now; } var newClients = from s in db.Resources.OfType() where !db.DimClients.Select(x => x.ResourceId).Contains(s.ResourceId) || modifiedClients.Select(x => x.Slave.ResourceId).Contains(s.ResourceId) select new { Slave = s, Group = s.ParentResourceId, Group2 = s.ParentResource.ParentResourceId }; var newDimClients = from s in newClients.ToList() select new DimClient { ResourceId = s.Slave.ResourceId, Name = s.Slave.Name, ExpirationTime = null, ResourceGroupId = s.Group, ResourceGroup2Id = s.Group2 }; db.DimClients.InsertAllOnSubmit(newDimClients); } private void UpdateFactTables(DimTime newTime, HiveDataContext db) { UpdateClientInfoFacts(newTime, db); UpdateTaskFacts(newTime, db); } private void UpdateClientInfoFacts(DimTime newTime, HiveDataContext db) { var time = newTime.Time; var lastFacts = from cf in db.FactClientInfos join r in db.DimClients on cf.ClientId equals r.Id group cf by r.ResourceId into grpFacts select new { ResourceId = grpFacts.Key, Fact = grpFacts.OrderByDescending(x => x.Time).First(), }; var slaves = from s in db.Resources.OfType() join c in db.DimClients on s.ResourceId equals c.ResourceId join lcf in lastFacts on c.ResourceId equals lcf.ResourceId into joinCf from cf in joinCf.DefaultIfEmpty() where c.ExpirationTime == null select new { Slave = s, Client = c, LastFact = cf != null ? cf.Fact : null }; var clientFacts = from s in slaves.ToList() let duration = s.LastFact != null ? (time - s.LastFact.Time).TotalMinutes : SmallestTimeSpan.TotalMinutes select new FactClientInfo { DimClient = s.Client, DimTime = newTime, UserId = s.Slave.OwnerUserId ?? Guid.Empty, NumUsedCores = s.Slave.Cores != null && s.Slave.FreeCores != null ? s.Slave.Cores.Value - s.Slave.FreeCores.Value : 0, NumTotalCores = s.Slave.Cores ?? 0, UsedMemory = s.Slave.Memory != null && s.Slave.FreeMemory != null ? s.Slave.Memory.Value - s.Slave.FreeMemory.Value : 0, TotalMemory = s.Slave.Memory ?? 0, CpuUtilization = s.Slave.CpuUtilization, TrafficIn = 0, TrafficOut = 0, TotalTimeIdle = s.Slave.SlaveState == SlaveState.Idle && s.Slave.IsAllowedToCalculate ? duration : 0.0, TotalTimeCalculating = s.Slave.SlaveState == SlaveState.Calculating ? duration : 0.0, TotalTimeTransferring = 0.0, TotalTimeUnavailable = s.Slave.SlaveState == SlaveState.Idle && !s.Slave.IsAllowedToCalculate ? duration : 0.0, TotalTimeOffline = s.Slave.SlaveState == SlaveState.Offline ? duration : 0.0 }; db.FactClientInfos.InsertAllOnSubmit(clientFacts); } private void UpdateTaskFacts(DimTime newTime, HiveDataContext db) { // old Task facts var oldFacts = from fact in db.FactTasks where fact.EndTime == null select fact; // query Task facts data var newFacts = from task in db.Tasks where !task.IsParentTask && (!db.FactTasks.Select(fact => fact.TaskId).Contains(task.TaskId) || oldFacts.Select(fact => fact.TaskId).Contains(task.TaskId)) join lastFact in oldFacts on task.TaskId equals lastFact.TaskId into lastFactPerTask from lastFact in lastFactPerTask.DefaultIfEmpty() let lastSlaveId = task.StateLogs.OrderByDescending(sl => sl.DateTime).First(sl => sl.SlaveId != null).SlaveId join client in db.DimClients.Where(client => client.ExpirationTime == null) on lastSlaveId equals client.ResourceId into clientsPerSlaveId from client in clientsPerSlaveId.DefaultIfEmpty() select new { Task = task, TaskDataSize = task.JobData.DataSize, StateLogs = task.StateLogs.OrderBy(sl => sl.DateTime), LastClientId = client != null ? client.Id : default(Guid?), LastFact = lastFact }; // new Task facts var newTaskFacts = from t in newFacts.ToList() let stateLogsLinkedList = new LinkedList(t.StateLogs) select new FactTask { TaskId = t.Task.TaskId, JobId = t.Task.JobId, DimTimeStart = t.LastFact != null ? t.LastFact.DimTimeStart : newTime, DimTimeEnd = new[] { TaskState.Finished, TaskState.Failed, TaskState.Aborted }.Contains(t.Task.State) ? newTime : null, LastClientId = t.LastClientId ?? (t.LastFact != null ? t.LastFact.LastClientId : Guid.Empty), Priority = t.Task.Priority, CoresRequired = t.Task.CoresNeeded, MemoryRequired = t.Task.MemoryNeeded, TaskSize = t.LastFact == null ? t.TaskDataSize : t.LastFact.TaskSize, ResultSize = t.Task.State == TaskState.Finished ? t.TaskDataSize : default(long?), NumCalculationRuns = stateLogsLinkedList.CountCalculationRuns(), NumRetries = stateLogsLinkedList.CountRetries(), TotalWaitingTime = stateLogsLinkedList.SumTotalTimeWhere(stateLog => stateLog.Value.State == TaskState.Waiting), TotalRuntime = stateLogsLinkedList.SumTotalTimeWhere(stateLog => stateLog.Value.State == TaskState.Calculating && stateLog.NextIs(x => x.State == TaskState.Transferring)), TotalTransferTime = stateLogsLinkedList.SumTotalTimeWhere(stateLog => stateLog.Value.State == TaskState.Transferring), TaskState = t.Task.State }; db.FactTasks.DeleteAllOnSubmit(oldFacts.ToList()); db.FactTasks.InsertAllOnSubmit(newTaskFacts); } } public static class StateLogLinkedListExtensions { public static int CountCalculationRuns(this LinkedList stateLogs) { return stateLogs.EnumerateNodes() .Count(sl => sl.Value.State == TaskState.Calculating && sl.NextIs(nsl => nsl.State == TaskState.Transferring)); } public static int CountRetries(this LinkedList stateLogs) { return stateLogs.EnumerateNodes() .Count(sl => sl.Value.State == TaskState.Calculating && sl.Next != null && sl.NextIs(nsl => nsl.State != TaskState.Transferring)); } public static double SumTotalTimeWhere(this LinkedList stateLogs, Predicate> predicate) { return stateLogs.EnumerateNodes() .Where(stateLog => predicate(stateLog)) .Sum(stateLog => stateLog.Next != null ? (stateLog.Next.Value.DateTime - stateLog.Value.DateTime).TotalMinutes : 0.0); } } public static class LinkedListExtensions { public static IEnumerable> EnumerateNodes(this LinkedList list) { var node = list.First; while (node != null) { yield return node; node = node.Next; } } public static bool NextIs(this LinkedListNode node, Predicate predicate) { return node.Next != null && predicate(node.Next.Value); } } }