Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/29/20 13:28:25 (4 years ago)
Author:
jkarder
Message:

#3062: overhauled statistics generation and cleanup

  • switched to a single thread for database cleanup and statistics generation (executed sequentially)
  • switched to preemptive deletion of items that are in status DeletionPending (for jobs: statelogs, taskdata, tasks)
  • added code that aborts tasks whose jobs have already been marked for deletion
  • added method UseTransactionAndSubmit in addition to UseTransaction in PersistenceManager
  • updated DAO methods and introduced more bare metal sql
  • introduced DAO methods for batch deletion
  • fixed usage of enum values in DAO sql queries
  • deleted unnecessary triggers tr_JobDeleteCascade and tr_TaskDeleteCascade in Prepare Hive Database.sql
  • changed scheduling for less interference with janitor and other heartbeats
    • increased scheduling patience from 20 to 70 seconds (to wait longer to get the mutex for scheduling)
    • changed signature of ITaskScheduler.Schedule
    • added base class for TaskSchedulers and moved assignment of tasks to slaves into it
    • changed RoundRobinTaskScheduler to use bare metal sql
  • made MessageContainer a storable type (leftover)
  • updated HiveJanitorServiceInstaller.nsi
Location:
trunk/HeuristicLab.Services.Hive.DataAccess/3.3
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Services.Hive.DataAccess/3.3/Daos/HiveStatistics/DimJobDao.cs

    r17180 r17574  
    4444    }
    4545
     46    public void UpdateExistingDimJobs() {
     47      DataContext.ExecuteCommand(UpdateExistingDimJobsQuery);
     48    }
     49
    4650    #region Compiled queries
    4751    private static readonly Func<DataContext, Guid, DimJob> GetByIdQuery =
     
    5155         select dimJob).SingleOrDefault());
    5256    #endregion
     57
     58    #region String queries
     59    private const string UpdateExistingDimJobsQuery = @"
     60UPDATE u
     61SET
     62  u.JobName = case when x.JobId is null then u.JobName else x.JobName end,
     63  u.TotalTasks = x.TotalTasks,
     64  u.CompletedTasks = x.CompletedTasks,
     65  u.DateCompleted =
     66    case when x.totaltasks = x.CompletedTasks
     67      then (case when x.JobId is null and x.DateCompleted is null then GETDATE() else x.DateCompleted end)
     68    else u.DateCompleted
     69  end,
     70  u.ProjectId = case when x.JobId is null then u.ProjectId else x.ProjectId end
     71FROM [statistics].dimjob u
     72JOIN (
     73  SELECT
     74    dj.JobId as DimJobId,
     75    j.JobId as JobId,
     76    j.Name as JobName,
     77    COUNT(*) as TotalTasks,
     78    SUM(
     79    CASE
     80    WHEN TaskState in ('Finished', 'Aborted', 'Failed') then 1
     81    ELSE 0
     82    END) as CompletedTasks,
     83    MAX(EndTime) as DateCompleted,
     84    dp.ProjectId as ProjectId
     85  from [statistics].DimJob dj
     86  join [statistics].FactTask ft on dj.JobId = ft.JobId
     87  left join Job j on j.JobId = dj.JobId
     88  left join [statistics].DimProject dp on j.ProjectId = dp.ProjectId
     89  where dj.DateCompleted is null and dp.DateExpired is null
     90  group by dj.JobId, j.JobId, j.Name, dp.ProjectId
     91) as x on u.JobId = x.DimJobId";
     92    #endregion
    5393  }
    5494}
  • trunk/HeuristicLab.Services.Hive.DataAccess/3.3/Daos/JobDao.cs

    r17180 r17574  
    3737    }
    3838
     39    public int DeleteByState(JobState state, int batchSize) {
     40      return DataContext.ExecuteCommand(DeleteTopNByStateQueryString, batchSize, Enum.GetName(typeof(JobState), state));
     41    }
     42
    3943    public IEnumerable<Job> GetByProjectId(Guid id) {
    4044      return GetByProjectIdQuery(DataContext, id);
     
    4347    public IEnumerable<Job> GetByProjectIds(IEnumerable<Guid> projectIds) {
    4448      string paramProjectIds = string.Join(",", projectIds.ToList().Select(x => string.Format("'{0}'", x)));
    45       if(!string.IsNullOrWhiteSpace(paramProjectIds)) {
     49      if (!string.IsNullOrWhiteSpace(paramProjectIds)) {
    4650        string queryString = string.Format(GetByProjectIdsQueryString, paramProjectIds);
    4751        return DataContext.ExecuteQuery<Job>(queryString);
     
    5155
    5256    public IEnumerable<Job> GetByState(JobState state) {
    53       return GetByStateQuery(DataContext, state);
     57      return GetByStateQuery(DataContext, state.ToString());
    5458    }
    5559
    5660    public IEnumerable<Guid> GetJobIdsByState(JobState state) {
    57       return GetJobIdsByStateQuery(DataContext, state);
     61      return GetJobIdsByStateQuery(DataContext, state.ToString());
    5862    }
    5963
     
    7478         where job.ProjectId == projectId
    7579         select job));
    76     private static readonly Func<DataContext, JobState, IEnumerable<Job>> GetByStateQuery =
    77       CompiledQuery.Compile((DataContext db, JobState jobState) =>
     80    private static readonly Func<DataContext, string, IEnumerable<Job>> GetByStateQuery =
     81      CompiledQuery.Compile((DataContext db, string jobState) =>
    7882        (from job in db.GetTable<Job>()
    79          where job.State == jobState
     83         where job.State.ToString() == jobState
    8084         select job));
    81     private static readonly Func<DataContext, JobState, IEnumerable<Guid>> GetJobIdsByStateQuery =
    82       CompiledQuery.Compile((DataContext db, JobState jobState) =>
     85    private static readonly Func<DataContext, string, IEnumerable<Guid>> GetJobIdsByStateQuery =
     86      CompiledQuery.Compile((DataContext db, string jobState) =>
    8387        (from job in db.GetTable<Job>()
    84          where job.State == jobState
     88         where job.State.ToString() == jobState
    8589         select job.JobId));
    8690    private static readonly Func<DataContext, IEnumerable<Job>> GetJobsReadyForDeletionQuery =
     
    101105      WHERE JobState = {0}
    102106    ";
     107    private const string DeleteTopNByStateQueryString = @"
     108      DELETE TOP ({0})
     109      FROM [Job]
     110      WHERE JobState = {1}
     111    ";
    103112    private const string GetStatisticsPendingJobs = @"
    104113      SELECT DISTINCT j.*
  • trunk/HeuristicLab.Services.Hive.DataAccess/3.3/Daos/StateLogDao.cs

    r17180 r17574  
    4040    }
    4141
     42    public int DeleteObsolete(int batchSize) {
     43      return DataContext.ExecuteCommand(DeleteObsoleteQueryString, batchSize);
     44    }
     45
    4246    #region Compiled queries
    4347    private static readonly Func<DataContext, Guid, StateLog> GetByIdQuery =
     
    5458         select stateLog).First(x => x.SlaveId != null));
    5559    #endregion
     60
     61    #region String queries
     62    private const string DeleteObsoleteQueryString = @"
     63delete top ({0}) s
     64from statelog s
     65  join task t on t.taskid = s.taskid
     66  join job j on j.jobid = t.jobid
     67where j.jobstate = 'deletionpending'
     68    ";
     69    #endregion
    5670  }
    5771}
  • trunk/HeuristicLab.Services.Hive.DataAccess/3.3/Daos/TaskDao.cs

    r17180 r17574  
    5050      //Because there is at the moment no case where this makes sense (there don't exist parent tasks which need to be calculated),
    5151      //we skip this step because it's wasted runtime
    52       return DataContext.ExecuteQuery<TaskPriorityInfo>(GetWaitingTasksQueryString, 
    53         slave.ResourceId, 
    54         Enum.GetName(typeof(TaskState), TaskState.Waiting), 
    55         slave.FreeCores, 
     52      return DataContext.ExecuteQuery<TaskPriorityInfo>(GetWaitingTasksQueryString,
     53        slave.ResourceId,
     54        Enum.GetName(typeof(TaskState), TaskState.Waiting),
     55        slave.FreeCores,
    5656        slave.FreeMemory).ToList();
    5757    }
     
    6565    /// <returns></returns>
    6666    public IEnumerable<Task> GetParentTasks(IEnumerable<Guid> resourceIds, int count, bool finished) {
    67     var query = from t in Table
    68                 where t.State == TaskState.Waiting
    69                     && t.IsParentTask
    70                     && t.Job.AssignedJobResources.All(x => resourceIds.ToList().Contains(x.ResourceId))
    71                     && t.FinishWhenChildJobsFinished == finished
    72                     && t.ChildJobs.Any()
    73                     && t.ChildJobs.All(x =>
    74                       x.State == TaskState.Finished
    75                       || x.State == TaskState.Aborted
    76                       || x.State == TaskState.Failed)
     67      var query = from t in Table
     68                  where t.State == TaskState.Waiting
     69                      && t.IsParentTask
     70                      && t.Job.AssignedJobResources.All(x => resourceIds.ToList().Contains(x.ResourceId))
     71                      && t.FinishWhenChildJobsFinished == finished
     72                      && t.ChildJobs.Any()
     73                      && t.ChildJobs.All(x =>
     74                        x.State == TaskState.Finished
     75                        || x.State == TaskState.Aborted
     76                        || x.State == TaskState.Failed)
    7777                  orderby t.Priority descending
    7878                  select t;
     
    8282    public void UpdateExecutionTime(Guid taskId, double executionTime) {
    8383      DataContext.ExecuteCommand(UpdateExecutionTimeQuery, executionTime, DateTime.Now, taskId);
     84    }
     85
     86    public int DeleteObsolete(int batchSize) {
     87      return DataContext.ExecuteCommand(DeleteObsoleteQueryString, batchSize);
    8488    }
    8589
     
    129133       WHERE TaskId = {2}
    130134    ";
     135
     136    private const string DeleteObsoleteQueryString = @"
     137delete top ({0}) t1
     138from task t1
     139  left join task t2 on t1.taskid = t2.parenttaskid
     140  join job j on j.jobid = t1.jobid
     141where j.jobstate = 'deletionpending' and t2.taskid is null
     142";
    131143    #endregion
    132144  }
  • trunk/HeuristicLab.Services.Hive.DataAccess/3.3/Daos/TaskDataDao.cs

    r17180 r17574  
    3232    }
    3333
     34    public int DeleteObsolete(int batchSize) {
     35      return DataContext.ExecuteCommand(DeleteObsoleteQueryString, batchSize);
     36    }
     37
    3438    #region Compiled queries
    3539    private static readonly Func<DataContext, Guid, TaskData> GetByIdQuery =
     
    3943         select taskData).SingleOrDefault());
    4044    #endregion
     45
     46    #region String queries
     47    private const string DeleteObsoleteQueryString = @"
     48delete top ({0}) td
     49from taskdata td
     50  join task t on t.taskid = td.taskid
     51  join job j on j.jobid = t.jobid
     52where j.jobstate = 'deletionpending'
     53    ";
     54    #endregion
    4155  }
    4256}
  • trunk/HeuristicLab.Services.Hive.DataAccess/3.3/Interfaces/IPersistenceManager.cs

    r17180 r17574  
    6464    #region Transaction management
    6565    void UseTransaction(Action call, bool repeatableRead = false, bool longRunning = false);
     66    void UseTransactionAndSubmit(Action call, bool repeatableRead = false, bool longRunning = false);
    6667    T UseTransaction<T>(Func<T> call, bool repeatableRead = false, bool longRunning = false);
     68    T UseTransactionAndSubmit<T>(Func<T> call, bool repeatableRead = false, bool longRunning = false);
    6769    #endregion
    6870
  • trunk/HeuristicLab.Services.Hive.DataAccess/3.3/Manager/PersistenceManager.cs

    r17180 r17574  
    184184    #region Transaction management
    185185    public void UseTransaction(Action call, bool repeatableRead = false, bool longRunning = false) {
    186       UseTransaction<object>(() => {
    187         call();
    188         return null;
    189       });
     186      UseTransaction<object>(() => { call(); return null; }, repeatableRead, longRunning);
     187    }
     188
     189    public void UseTransactionAndSubmit(Action call, bool repeatableRead = false, bool longRunning = false) {
     190      UseTransaction(() => { call(); SubmitChanges(); }, repeatableRead, longRunning);
    190191    }
    191192
     
    211212    }
    212213
     214    public T UseTransactionAndSubmit<T>(Func<T> call, bool repeatableRead = false, bool longRunning = false) {
     215      return UseTransaction(() => { var res = call(); SubmitChanges(); return res; }, repeatableRead, longRunning);
     216    }
     217
    213218    private static TransactionScope CreateTransaction(bool repeatableRead, bool longRunning) {
    214219      var options = new TransactionOptions {
  • trunk/HeuristicLab.Services.Hive.DataAccess/3.3/SQL Scripts/Prepare Hive Database.sql

    r16565 r17574  
    141141INCLUDE ([TaskId],[ExecutionTimeMs],[LastHeartbeat],[ParentTaskId],[Priority],[Command],[JobId])
    142142GO
    143 
    144 
    145 
    146 -- OBSOLETE - DO NOT PERFORM (start)
    147 /****** Object:  Trigger [dbo].[tr_JobDeleteCascade]    Script Date: 04/19/2011 16:31:53 ******/
    148 SET ANSI_NULLS ON
    149 GO
    150 SET QUOTED_IDENTIFIER ON
    151 GO
    152 -- =============================================
    153 -- Author:    cneumuel
    154 -- Create date: 19.04.2011
    155 -- Description: (1) Writes the execution times of deleted jobs into DeletedJobStats to ensure correct statistics
    156 --        (2) Deletes all associated jobs. This cannot be done with cascading delete,
    157 --              because the job table defines a INSTEAD OF DELETE trigger itself, which
    158 --              is not compatible with cascading deletes.
    159 -- =============================================
    160 CREATE TRIGGER [dbo].[tr_JobDeleteCascade] ON [dbo].[Job] INSTEAD OF DELETE AS
    161 BEGIN
    162     DELETE Task FROM deleted, Task WHERE deleted.JobId = Task.JobId
    163     DELETE Job FROM deleted, Job WHERE deleted.JobId = Job.JobId
    164 END
    165 GO
    166 
    167 -- =============================================
    168 -- Author:    cneumuel
    169 -- Create date: 11.11.2010
    170 -- Description: Recursively deletes all child-jobs of a job when it is deleted. (Source: http://devio.wordpress.com/2008/05/23/recursive-delete-in-sql-server/)
    171 -- =============================================DeletedJobStatistics
    172 CREATE TRIGGER [dbo].[tr_TaskDeleteCascade] ON [dbo].[Task] INSTEAD OF DELETE AS
    173 BEGIN
    174     -- recursively delete jobs
    175     CREATE TABLE #Table(
    176         TaskId uniqueidentifier
    177     )
    178     INSERT INTO #Table (TaskId)
    179     SELECT TaskId FROM deleted
    180    
    181     DECLARE @c INT
    182     SET @c = 0
    183    
    184     WHILE @c <> (SELECT COUNT(TaskId) FROM #Table) BEGIN
    185         SELECT @c = COUNT(TaskId) FROM #Table
    186        
    187         INSERT INTO #Table (TaskId)
    188             SELECT Task.TaskId
    189             FROM Task
    190             LEFT OUTER JOIN #Table ON Task.TaskId = #Table.TaskId
    191             WHERE Task.ParentTaskId IN (SELECT TaskId FROM #Table)
    192                 AND #Table.TaskId IS NULL
    193     END
    194    
    195     DELETE TaskData FROM TaskData INNER JOIN #Table ON TaskData.TaskId = #Table.TaskId
    196     DELETE Task FROM Task INNER JOIN #Table ON Task.TaskId = #Table.TaskId
    197 END
    198 GO
    199 -- OBSOLETE (end)
    200 
Note: See TracChangeset for help on using the changeset viewer.