using System; using System.Linq; using System.Transactions; using HeuristicLab.Services.Access; using HeuristicLab.Services.Hive.DataAccess; namespace HeuristicLab.Services.Hive { public class HiveStatisticsGenerator : IStatisticsGenerator { private IUserManager userManager { get { return ServiceLocator.Instance.UserManager; } } private static readonly TimeSpan SmallestTimeSpan = new TimeSpan(0, 5, 0); public void GenerateStatistics() { using (var db = new HiveDataContext(Settings.Default.HeuristicLab_Hive_LinqConnectionString)) using (var transaction = new TransactionScope(TransactionScopeOption.Required, new TransactionOptions { IsolationLevel = IsolationLevel.ReadCommitted })) { var newTime = UpdateDimensionTables(db); db.SubmitChanges(); if (newTime != null) { UpdateFactTables(newTime, db); db.SubmitChanges(); } transaction.Complete(); } } private DimTime UpdateDimensionTables(HiveDataContext db) { var newTime = UpdateTime(db); // Update other tables out of sync with time dimension? UpdateUsers(db); UpdateJobs(db); UpdateClients(db); return newTime; } private DimTime UpdateTime(HiveDataContext db) { var lastUpdateTime = (from t in db.DimTimes orderby t.Time descending select t.Time) .FirstOrDefault(); var now = DateTime.Now; DimTime newTime = null; if (lastUpdateTime == default(DateTime) || lastUpdateTime + SmallestTimeSpan < now) { newTime = new DimTime { Time = new DateTime(now.Year, now.Month, now.Day, now.Hour, now.Minute - now.Minute % SmallestTimeSpan.Minutes, 0), Hour = new DateTime(now.Year, now.Month, now.Day, now.Hour, 0, 0), Day = new DateTime(now.Year, now.Month, now.Day, 0, 0, 0), Month = new DateTime(now.Year, now.Month, 1, 0, 0, 0), Year = new DateTime(now.Year, 1, 1, 0, 0, 0) }; db.DimTimes.InsertOnSubmit(newTime); } return newTime; } private void UpdateUsers(HiveDataContext db) { var newUsers = from u in db.Resources.Where(x => x.OwnerUserId != null).Select(x => x.OwnerUserId.Value).Union(db.Jobs.Select(x => x.OwnerUserId)) where !db.DimUsers.Select(x => x.UserId).Contains(u) select u; var newDimUsers = from u in newUsers.ToList() select new DimUser { UserId = u, Name = userManager.GetUserById(u).UserName }; db.DimUsers.InsertAllOnSubmit(newDimUsers); // insert NULL-User if (!db.DimUsers.Any(x => x.UserId == Guid.Empty)) { db.DimUsers.InsertOnSubmit(new DimUser { UserId = Guid.Empty, Name = "NULL" }); } } private void UpdateJobs(HiveDataContext db) { var newJobs = from j in db.Jobs where !db.DimJobs.Select(x => x.JobId).Contains(j.JobId) select j; var newDimJobs = from j in newJobs.ToList() select new DimJob { JobId = j.JobId, JobName = j.Name, UserId = j.OwnerUserId, UserName = userManager.GetUserById(j.OwnerUserId).UserName }; db.DimJobs.InsertAllOnSubmit(newDimJobs); // insert NULL-Job if (!db.DimJobs.Any(x => x.JobId == Guid.Empty)) { db.DimJobs.InsertOnSubmit(new DimJob { JobId = Guid.Empty, JobName = "NULL", UserId = Guid.Empty, UserName = "NULL" }); } } private void UpdateClients(HiveDataContext db) { var removedClients = from c in db.DimClients where c.ExpirationTime == null && !db.Resources.OfType().Select(x => x.ResourceId).Contains(c.ResourceId) select c; var modifiedClients = from s in db.Resources.OfType() join c in db.DimClients on s.ResourceId equals c.ResourceId where c.ExpirationTime == null && (s.Name != c.Name || s.ParentResourceId != c.ResourceGroupId || s.ParentResource.ParentResourceId != c.ResourceGroup2Id) select new { Slave = s, Client = c }; foreach (var client in removedClients.Union(modifiedClients.Select(x => x.Client))) { client.ExpirationTime = DateTime.Now; } var newClients = from s in db.Resources.OfType() where !db.DimClients.Select(x => x.ResourceId).Contains(s.ResourceId) || modifiedClients.Select(x => x.Slave.ResourceId).Contains(s.ResourceId) select new { Slave = s, Group = s.ParentResourceId, Group2 = s.ParentResource.ParentResourceId }; var newDimClients = from s in newClients.ToList() select new DimClient { ResourceId = s.Slave.ResourceId, Name = s.Slave.Name, ExpirationTime = null, ResourceGroupId = s.Group, ResourceGroup2Id = s.Group2 }; db.DimClients.InsertAllOnSubmit(newDimClients); } private void UpdateFactTables(DimTime newTime, HiveDataContext db) { UpdateClientInfoFacts(newTime, db); UpdateTaskFacts(newTime, db); } private void UpdateClientInfoFacts(DimTime newTime, HiveDataContext db) { var lastFacts = from cf in db.FactClientInfos join r in db.DimClients on cf.ClientId equals r.Id group cf by r.ResourceId into grpFacts select new { ResourceId = grpFacts.Key, Fact = grpFacts.OrderByDescending(x => x.Time).First(), }; var slaves = from s in db.Resources.OfType() join c in db.DimClients on s.ResourceId equals c.ResourceId join lcf in lastFacts on c.ResourceId equals lcf.ResourceId into joinCf from cf in joinCf.DefaultIfEmpty() where c.ExpirationTime == null select new { Slave = s, Client = c, LastFact = cf != null ? cf.Fact : null }; var clientFacts = from s in slaves.ToList() select new FactClientInfo { DimClient = s.Client, DimTime = newTime, UserId = s.Slave.OwnerUserId ?? Guid.Empty, NumUsedCores = s.Slave.Cores != null && s.Slave.FreeCores != null ? s.Slave.Cores.Value - s.Slave.FreeCores.Value : 0, NumTotalCores = s.Slave.Cores ?? 0, UsedMemory = s.Slave.Memory != null && s.Slave.FreeMemory != null ? s.Slave.Memory.Value - s.Slave.FreeMemory.Value : 0, TotalMemory = s.Slave.Memory ?? 0, CpuUtilization = s.Slave.CpuUtilization, TrafficIn = 0, TrafficOut = 0, TotalTimeIdle = CalcNewTotalTime(s.LastFact, newTime.Time, x => x.TotalTimeIdle, () => s.Slave.SlaveState == SlaveState.Idle && s.Slave.IsAllowedToCalculate), TotalTimeCalculating = CalcNewTotalTime(s.LastFact, newTime.Time, x => x.TotalTimeCalculating, () => s.Slave.SlaveState == SlaveState.Calculating), TotalTimeTransferring = 0.0, TotalTimeUnavailable = CalcNewTotalTime(s.LastFact, newTime.Time, x => x.TotalTimeUnavailable, () => s.Slave.SlaveState == SlaveState.Idle && !s.Slave.IsAllowedToCalculate), TotalTimeOffline = CalcNewTotalTime(s.LastFact, newTime.Time, x => x.TotalTimeOffline, () => s.Slave.SlaveState == SlaveState.Offline) }; db.FactClientInfos.InsertAllOnSubmit(clientFacts); } private double CalcNewTotalTime(FactClientInfo lastFact, DateTime newTime, Func selector, Func condition) { if (lastFact == null) { return 0.0; } return condition() ? selector(lastFact) + (newTime - lastFact.Time).TotalMinutes : selector(lastFact); } private void UpdateTaskFacts(DimTime newTime, HiveDataContext db) { var lastFacts = from fact in db.FactTasks group fact by fact.TaskId into factsPerTask select factsPerTask.OrderByDescending(x => x.EndTime).First(); var lastFactAndStateLogsAndLastClientPerTask = from task in db.Tasks.Where(task => !task.IsParentTask) join lastFact in lastFacts on task.TaskId equals lastFact.TaskId into lastFactsPerTask from lastFact in lastFactsPerTask.DefaultIfEmpty() let newStateLogs = task.StateLogs.Where(stateLog => (lastFact == null || stateLog.DateTime > lastFact.EndTime) && stateLog.DateTime < newTime.Time) let lastSlaveId = newStateLogs.OrderByDescending(stateLog => stateLog.DateTime).First(stateLog => stateLog.SlaveId != null).SlaveId join client in db.DimClients.Where(client => client.ExpirationTime == null) on lastSlaveId equals client.ResourceId into clientPerTask from client in clientPerTask.DefaultIfEmpty() select new { Task = task, TaskDataSize = task.JobData.DataSize, LastFact = lastFact, StateLogs = newStateLogs.OrderBy(stateLog => stateLog.DateTime), LastClientId = client != null ? client.Id : default(Guid?) }; var newTaskFacts = from t in lastFactAndStateLogsAndLastClientPerTask.ToList() select new FactTask { TaskId = t.Task.TaskId, JobId = t.Task.JobId, DimTimeStart = t.LastFact != null ? t.LastFact.DimTimeStart : newTime, DimTimeEnd = newTime, LastClientId = t.LastClientId ?? (t.LastFact != null ? t.LastFact.LastClientId : Guid.Empty), Priority = t.Task.Priority, CoresRequired = t.Task.CoresNeeded, MemoryRequired = t.Task.MemoryNeeded, TaskSize = t.LastFact != null ? t.LastFact.TaskSize : t.TaskDataSize, ResultSize = t.Task.State == TaskState.Finished ? t.TaskDataSize : default(long?), NumCalculationRuns = (t.LastFact != null ? t.LastFact.NumCalculationRuns : 0) + t.StateLogs.Count(x => x.State == TaskState.Calculating), NumFails = (t.LastFact != null ? t.LastFact.NumFails : 0) + t.StateLogs.Count(x => x.State == TaskState.Failed || !string.IsNullOrEmpty(x.Exception)), TotalWaitingTime = 0, //ToDo TotalRuntime = 0, //ToDo TotalTransferTime = 0 //ToDo }; db.FactTasks.InsertAllOnSubmit(newTaskFacts); } } }