- Timestamp:
- 07/03/14 16:03:14 (10 years ago)
- Location:
- trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/Core.cs
r9456 r11082 204 204 TS.Task.Factory.StartNew(HandleCalculateTask, jobId) 205 205 .ContinueWith((t) => { 206 SlaveStatusInfo.Increment ExceptionOccured();206 SlaveStatusInfo.IncrementTasksFailed(); 207 207 SlaveClientCom.Instance.LogMessage(t.Exception.ToString()); 208 208 }, TaskContinuationOptions.OnlyOnFaulted); … … 212 212 TS.Task.Factory.StartNew(HandleStopTask, jobId) 213 213 .ContinueWith((t) => { 214 SlaveStatusInfo.Increment ExceptionOccured();214 SlaveStatusInfo.IncrementTasksFailed(); 215 215 SlaveClientCom.Instance.LogMessage(t.Exception.ToString()); 216 216 }, TaskContinuationOptions.OnlyOnFaulted); … … 220 220 TS.Task.Factory.StartNew(HandlePauseTask, jobId) 221 221 .ContinueWith((t) => { 222 SlaveStatusInfo.Increment ExceptionOccured();222 SlaveStatusInfo.IncrementTasksFailed(); 223 223 SlaveClientCom.Instance.LogMessage(t.Exception.ToString()); 224 224 }, TaskContinuationOptions.OnlyOnFaulted); … … 228 228 TS.Task.Factory.StartNew(HandleAbortTask, jobId) 229 229 .ContinueWith((t) => { 230 SlaveStatusInfo.Increment ExceptionOccured();230 SlaveStatusInfo.IncrementTasksFailed(); 231 231 SlaveClientCom.Instance.LogMessage(t.Exception.ToString()); 232 232 }, TaskContinuationOptions.OnlyOnFaulted); … … 328 328 this.taskManager.TaskStopped += new EventHandler<EventArgs<SlaveTask, TaskData>>(taskManager_TaskStopped); 329 329 this.taskManager.TaskFailed += new EventHandler<EventArgs<Tuple<SlaveTask, TaskData, Exception>>>(taskManager_TaskFailed); 330 this.taskManager.ExceptionOccured += new EventHandler<EventArgs<SlaveTask, Exception>>(taskManager_ExceptionOccured);331 330 this.taskManager.TaskAborted += new EventHandler<EventArgs<SlaveTask>>(taskManager_TaskAborted); 332 331 } … … 391 390 } 392 391 catch (TaskNotFoundException ex) { 393 SlaveStatusInfo.Increment ExceptionOccured();392 SlaveStatusInfo.IncrementTasksFailed(); 394 393 SlaveClientCom.Instance.LogMessage(ex.ToString()); 395 394 } 396 395 catch (Exception ex) { 397 SlaveStatusInfo.Increment ExceptionOccured();396 SlaveStatusInfo.IncrementTasksFailed(); 398 397 SlaveClientCom.Instance.LogMessage(ex.ToString()); 399 398 } 400 }401 402 private void taskManager_ExceptionOccured(object sender, EventArgs<SlaveTask, Exception> e) {403 SlaveStatusInfo.DecrementUsedCores(e.Value.CoresNeeded);404 SlaveStatusInfo.IncrementExceptionOccured();405 heartbeatManager.AwakeHeartBeatThread();406 SlaveClientCom.Instance.LogMessage(string.Format("Exception occured for task {0}: {1}", e.Value.TaskId, e.Value2.ToString()));407 wcfService.UpdateJobState(e.Value.TaskId, TaskState.Waiting, e.Value2.ToString());408 399 } 409 400 -
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/Executor.cs
r9456 r11082 85 85 task.Start(); 86 86 if (!startTaskSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) { 87 taskDataInvalid = true;88 87 throw new TimeoutException("Timeout when starting the task. TaskStarted event was not fired."); 89 88 } 90 89 } 91 90 catch (Exception e) { 92 this.CurrentException = e; 93 taskDataInvalid = true; 94 Task_TaskFailed(this, new EventArgs<Exception>(e)); 95 } finally { 91 HandleStartStopPauseError(e); 92 } 93 finally { 96 94 taskStartedSem.Set(); 97 95 } … … 103 101 taskStartedSem.WaitOne(Settings.Default.ExecutorSemTimeouts); 104 102 if (task == null) { 105 CurrentException = new Exception("Pausing task " + this.TaskId + ": Task is null"); 106 executorQueue.AddMessage(ExecutorMessageType.ExceptionOccured); 103 HandleStartStopPauseError(new Exception("Pausing task " + this.TaskId + ": Task is null")); 107 104 return; 108 105 } … … 112 109 task.Pause(); 113 110 //we need to block the pause... 114 pauseStopSem.WaitOne(); 111 if (!pauseStopSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) { 112 throw new Exception("Pausing task " + this.TaskId + " timed out."); 113 } 115 114 } 116 115 catch (Exception ex) { 117 CurrentException = new Exception("Error pausing task " + this.TaskId + ": " + ex.ToString()); 118 executorQueue.AddMessage(ExecutorMessageType.ExceptionOccured); 116 HandleStartStopPauseError(ex); 119 117 } 120 118 } … … 125 123 // wait until task is started. if this does not happen, the Task is null an we give up 126 124 taskStartedSem.WaitOne(Settings.Default.ExecutorSemTimeouts); 125 wasTaskAborted = true; 126 127 127 if (task == null) { 128 CurrentException = new Exception("Stopping task " + this.TaskId + ": Task is null"); 129 executorQueue.AddMessage(ExecutorMessageType.ExceptionOccured); 130 } 131 wasTaskAborted = true; 128 HandleStartStopPauseError(new Exception("Stopping task " + this.TaskId + ": Task is null")); 129 return; 130 } 132 131 133 132 if ((ExecutionState == ExecutionState.Started) || (ExecutionState == ExecutionState.Paused)) { 134 133 try { 135 134 task.Stop(); 136 pauseStopSem.WaitOne(); 135 if (!pauseStopSem.WaitOne(Settings.Default.ExecutorSemTimeouts)) { 136 throw new Exception("Stopping task " + this.TaskId + " timed out."); 137 } 137 138 } 138 139 catch (Exception ex) { 139 CurrentException = new Exception("Error stopping task " + this.TaskId + ": " + ex.ToString()); 140 executorQueue.AddMessage(ExecutorMessageType.ExceptionOccured); 140 HandleStartStopPauseError(ex); 141 141 } 142 142 } … … 190 190 if (task != null && task.ExecutionState == ExecutionState.Started) { 191 191 throw new InvalidStateException("Task is still running"); 192 } 193 194 TaskData taskData = null; 195 if (task == null) { 196 if (CurrentException == null) { 197 CurrentException = new Exception("Task with id " + this.TaskId + " is null, sending empty task"); 198 } 192 199 } else { 193 TaskData taskData = new TaskData(); 194 if (task == null) { 195 //send empty task and save exception 196 taskData.Data = PersistenceUtil.Serialize(new TaskData()); 197 if (CurrentException == null) { 198 CurrentException = new Exception("Task with id " + this.TaskId + " is null, sending empty task"); 199 } 200 } else { 201 taskData.Data = PersistenceUtil.Serialize(task); 202 } 200 taskData = new TaskData(); 201 taskData.Data = PersistenceUtil.Serialize(task); 203 202 taskData.TaskId = TaskId; 204 return taskData;205 }203 } 204 return taskData; 206 205 } 207 206 … … 211 210 task = null; 212 211 } 212 213 private void HandleStartStopPauseError(Exception e) { 214 taskDataInvalid = true; 215 Task_TaskFailed(this, new EventArgs<Exception>(e)); 216 } 213 217 } 214 218 } -
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/ExecutorMessage.cs
r9456 r11082 30 30 TaskStopped, 31 31 TaskFailed, 32 StopExecutorMonitoringThread, 33 ExceptionOccured 32 StopExecutorMonitoringThread 34 33 } 35 34 -
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/Manager/ConfigManager.cs
r9456 r11082 117 117 st.JobsFetched = SlaveStatusInfo.TasksFetched; 118 118 st.JobsFailed = SlaveStatusInfo.TasksFailed; 119 st.ExceptionsOccured = SlaveStatusInfo.ExceptionsOccured;120 119 121 120 st.Jobs = jobManager.GetExecutionTimes().Select(x => new TaskStatus { TaskId = x.Key, ExecutionTime = x.Value }).ToList(); -
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/Manager/HeartbeatManager.cs
r9456 r11082 24 24 using System.Threading; 25 25 using HeuristicLab.Clients.Hive.SlaveCore.Properties; 26 using HeuristicLab.Common;27 26 28 27 namespace HeuristicLab.Clients.Hive.SlaveCore { … … 115 114 if (msgs == null) { 116 115 SlaveClientCom.Instance.LogMessage("Error getting response from HB"); 117 OnExceptionOccured(new Exception("Error getting response from HB"));118 116 } else { 119 117 SlaveClientCom.Instance.LogMessage("HB Response received (" + msgs.Count + "): "); … … 126 124 catch (Exception e) { 127 125 SlaveClientCom.Instance.LogMessage("Heartbeat thread failed: " + e.ToString()); 128 OnExceptionOccured(e);129 126 } 130 127 waitHandle.WaitOne(this.interval); … … 132 129 SlaveClientCom.Instance.LogMessage("Heartbeat thread stopped"); 133 130 } 134 135 #region Eventhandler136 public event EventHandler<EventArgs<Exception>> ExceptionOccured;137 private void OnExceptionOccured(Exception e) {138 var handler = ExceptionOccured;139 if (handler != null) handler(this, new EventArgs<Exception>(e));140 }141 #endregion142 131 } 143 132 } -
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/Manager/TaskManager.cs
r9456 r11082 71 71 try { 72 72 if (slaveTasks.ContainsKey(task.Id)) { 73 SlaveStatusInfo.Increment ExceptionOccured();73 SlaveStatusInfo.IncrementTasksFailed(); 74 74 throw new TaskAlreadyRunningException(task.Id); 75 75 } else { … … 185 185 slaveTask.TaskStopped += new EventHandler<EventArgs<Guid>>(slaveTask_TaskStopped); 186 186 slaveTask.TaskFailed += new EventHandler<EventArgs<Guid, Exception>>(slaveTask_TaskFailed); 187 slaveTask.ExceptionOccured += new EventHandler<EventArgs<Guid, Exception>>(slaveTask_ExceptionOccured);188 187 } 189 188 … … 193 192 slaveTask.TaskStopped -= new EventHandler<EventArgs<Guid>>(slaveTask_TaskStopped); 194 193 slaveTask.TaskFailed -= new EventHandler<EventArgs<Guid, Exception>>(slaveTask_TaskFailed); 195 slaveTask.ExceptionOccured -= new EventHandler<EventArgs<Guid, Exception>>(slaveTask_ExceptionOccured);196 194 } 197 195 … … 220 218 try { 221 219 taskData = slaveTask.GetTaskData(); 222 if (taskData == null) throw new SerializationException();223 220 SlaveStatusInfo.IncrementTasksFinished(); 224 221 OnTaskPaused(slaveTask, taskData); … … 243 240 try { 244 241 taskData = slaveTask.GetTaskData(); 245 if (taskData == null) throw new SerializationException();246 242 SlaveStatusInfo.IncrementTasksFinished(); 247 243 OnTaskStopped(slaveTask, taskData); … … 266 262 try { 267 263 taskData = slaveTask.GetTaskData(); 268 if (taskData == null) throw new SerializationException();269 264 } 270 265 catch { /* taskData will be null */ } 271 266 SlaveStatusInfo.IncrementTasksFailed(); 272 267 OnTaskFailed(slaveTask, taskData, e.Value2); 273 }274 275 private void slaveTask_ExceptionOccured(object sender, EventArgs<Guid, Exception> e) {276 SlaveTask slaveTask;277 slaveTasksLocker.EnterUpgradeableReadLock();278 try {279 slaveTask = slaveTasks[e.Value];280 RemoveSlaveTask(e.Value, slaveTask);281 }282 finally { slaveTasksLocker.ExitUpgradeableReadLock(); }283 284 SlaveStatusInfo.IncrementExceptionOccured();285 OnExceptionOccured(slaveTask, e.Value2);286 268 } 287 269 #endregion … … 312 294 } 313 295 314 public event EventHandler<EventArgs<SlaveTask, Exception>> ExceptionOccured;315 private void OnExceptionOccured(SlaveTask slaveTask, Exception exception) {316 var handler = ExceptionOccured;317 if (handler != null) handler(this, new EventArgs<SlaveTask, Exception>(slaveTask, exception));318 }319 320 296 public event EventHandler<EventArgs<SlaveTask>> TaskAborted; 321 297 private void OnTaskAborted(SlaveTask slaveTask) { -
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/SlaveStatusInfo.cs
r9456 r11082 31 31 private static int tasksAborted; // server sent abort 32 32 private static int tasksFailed; // tasks that failed in the sandbox 33 private static int exceptionsOccured; // number jobs failed caused by the business logic, not a faulted task34 33 private static int usedCores; // number of cores currently used 35 34 … … 60 59 } 61 60 62 public static int ExceptionsOccured {63 get { return exceptionsOccured; }64 }65 66 61 public static void IncrementTasksStarted() { 67 62 Interlocked.Increment(ref tasksStarted); … … 84 79 } 85 80 86 public static void IncrementExceptionOccured() {87 Interlocked.Increment(ref exceptionsOccured);88 }89 90 81 public static void IncrementUsedCores(int val) { 91 82 Interlocked.Add(ref usedCores, val); -
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/SlaveTask.cs
r9456 r11082 43 43 public Guid TaskId { get; private set; } 44 44 public bool IsPrepared { get; private set; } 45 private TaskData originalTaskData; 45 46 46 47 private int coresNeeded; … … 74 75 try { 75 76 this.TaskId = task.Id; 77 originalTaskData = taskData; 76 78 Prepare(task); 77 79 StartTaskInAppDomain(taskData); … … 143 145 while (repeat > 0) { 144 146 try { 145 waitForStartBeforeKillSem.WaitOne( );147 waitForStartBeforeKillSem.WaitOne(Settings.Default.ExecutorSemTimeouts); 146 148 AppDomain.Unload(appDomain); 147 149 waitForStartBeforeKillSem.Dispose(); … … 164 166 private void AppDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) { 165 167 DisposeAppDomain(); 166 On ExceptionOccured(new Exception("Unhandled exception: " + e.ExceptionObject.ToString()));168 OnTaskFailed(new Exception("Unhandled exception: " + e.ExceptionObject.ToString())); 167 169 } 168 170 … … 171 173 try { 172 174 data = executor.GetTaskData(); 175 //this means that there was a problem executing the task 176 if (data == null) return originalTaskData; 173 177 } 174 178 catch (Exception ex) { … … 233 237 executorMonitoringRun = false; 234 238 break; 235 236 case ExecutorMessageType.ExceptionOccured:237 executorMonitoringRun = false;238 DisposeAppDomain();239 if (executor.CurrentException != null) {240 OnExceptionOccured(executor.CurrentException);241 } else {242 OnExceptionOccured(new Exception(string.Format("Unknow exception occured in Executor for task {0}", TaskId)));243 }244 break;245 239 } 246 240 } … … 277 271 if (handler != null) handler(this, new EventArgs<Guid, Exception>(this.TaskId, exception)); 278 272 } 279 280 public event EventHandler<EventArgs<Guid, Exception>> ExceptionOccured;281 private void OnExceptionOccured(Exception exception) {282 var handler = ExceptionOccured;283 if (handler != null) handler(this, new EventArgs<Guid, Exception>(this.TaskId, exception));284 }285 273 } 286 274 } -
trunk/sources/HeuristicLab.Clients.Hive.Slave/3.3/StatusCommons.cs
r9456 r11082 51 51 public int JobsFailed { get; set; } 52 52 [DataMember] 53 public int ExceptionsOccured { get; set; }54 [DataMember]55 53 public List<TaskStatus> Jobs { get; set; } 56 54 [DataMember] … … 58 56 59 57 public override string ToString() { 60 return string.Format("Status: {0}, Fetc,Start,Fin,Abor,Fail,Exc: {1},{2},{3},{4},{5} ,{6}",61 Status, JobsFetched, JobsStarted, JobsFinished, JobsAborted, JobsFailed , ExceptionsOccured);58 return string.Format("Status: {0}, Fetc,Start,Fin,Abor,Fail,Exc: {1},{2},{3},{4},{5}", 59 Status, JobsFetched, JobsStarted, JobsFinished, JobsAborted, JobsFailed); 62 60 } 63 61 }
Note: See TracChangeset
for help on using the changeset viewer.