Changeset 5786
- Timestamp:
- 03/22/11 11:36:53 (14 years ago)
- Location:
- branches/HeuristicLab.Hive-3.4/sources
- Files:
-
- 4 added
- 1 deleted
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/Core.cs
r5780 r5786 57 57 private int coreThreadId; 58 58 59 private ISlaveCommunication ClientCom;59 private ISlaveCommunication clientCom; 60 60 private ServiceHost slaveComm; 61 61 … … 84 84 slaveComm.Open(); 85 85 86 ClientCom = SlaveClientCom.Instance.ClientCom;87 ClientCom.LogMessage("Hive Slave started");86 clientCom = SlaveClientCom.Instance.ClientCom; 87 clientCom.LogMessage("Hive Slave started"); 88 88 89 89 ConfigManager manager = ConfigManager.Instance; … … 139 139 140 140 void WcfService_ExceptionOccured(object sender, EventArgs<Exception> e) { 141 ClientCom.LogMessage("Connection to server interruped with exception: " + e.Value.Message);141 clientCom.LogMessage("Connection to server interruped with exception: " + e.Value.Message); 142 142 } 143 143 144 144 void WcfService_Connected(object sender, EventArgs e) { 145 ClientCom.LogMessage("Connected successfully to Hive server");145 clientCom.LogMessage("Connected successfully to Hive server"); 146 146 } 147 147 … … 151 151 /// <param name="container">The Container, containing the message</param> 152 152 private void DetermineAction(MessageContainer container) { 153 ClientCom.LogMessage("Message: " + container.Message.ToString() + " for job: " + container.JobId);153 clientCom.LogMessage("Message: " + container.Message.ToString() + " for job: " + container.JobId); 154 154 155 155 if (container is ExecutorMessageContainer<Guid>) { … … 159 159 switch (container.Message) { 160 160 case MessageContainer.MessageType.CalculateJob: 161 Task.Factory.StartNew(() => { 162 Job job = wcfService.GetJob(container.JobId); 161 clientCom.LogMessage("Task.StartNew[0]: jobId: " + container.JobId); 162 Task.Factory.StartNew((jobIdObj) => { 163 Guid jobId = (Guid)jobIdObj; 164 clientCom.LogMessage("Task.StartNew[1]: jobId: " + jobId); 165 Job job = wcfService.GetJob(jobId); 166 if (job == null) throw new JobNotFoundException(jobId); 163 167 lock (engines) { 164 168 if (!jobs.ContainsKey(job.Id)) { … … 167 171 } 168 172 JobData jobData = wcfService.GetJobData(job.Id); 173 if (job == null) throw new JobDataNotFoundException(jobId); 169 174 job = wcfService.UpdateJobState(job.Id, JobState.Calculating, null); 170 175 StartJobInAppDomain(job, jobData); 171 }); 176 }, container.JobId) 177 .ContinueWith((t) => { 178 // handle exception of task 179 clientCom.LogMessage(t.Exception.ToString()); 180 }, TaskContinuationOptions.OnlyOnFaulted); 172 181 break; 173 182 case MessageContainer.MessageType.ShutdownSlave: … … 203 212 } 204 213 } else { 205 ClientCom.LogMessage("Unknown MessageContainer: " + container);214 clientCom.LogMessage("Unknown MessageContainer: " + container); 206 215 } 207 216 } … … 217 226 218 227 try { 219 ClientCom.LogMessage("Sending the paused job with id: " + job.Id);228 clientCom.LogMessage("Sending the paused job with id: " + job.Id); 220 229 wcfService.UpdateJobData(job, sJob, ConfigManager.Instance.GetClientInfo().Id, JobState.Paused); 221 230 SlaveStatusInfo.JobsProcessed++; //TODO: count or not count, thats the question 222 231 } 223 232 catch (Exception e) { 224 ClientCom.LogMessage("Transmitting to server failed. Storing the paused job with id: " + job.Id + " to hdd (" + e.ToString() + ")");233 clientCom.LogMessage("Transmitting to server failed. Storing the paused job with id: " + job.Id + " to hdd (" + e.ToString() + ")"); 225 234 } 226 235 finally { … … 240 249 241 250 try { 242 ClientCom.LogMessage("Sending the stoppped job with id: " + job.Id);251 clientCom.LogMessage("Sending the stoppped job with id: " + job.Id); 243 252 wcfService.UpdateJobData(job, sJob, ConfigManager.Instance.GetClientInfo().Id, JobState.Paused); 244 253 SlaveStatusInfo.JobsProcessed++; //TODO: count or not count, thats the question 245 254 } 246 255 catch (Exception e) { 247 ClientCom.LogMessage("Transmitting to server failed. Storing the paused job with id: " + job.Id + " to hdd (" + e.ToString() + ")");256 clientCom.LogMessage("Transmitting to server failed. Storing the paused job with id: " + job.Id + " to hdd (" + e.ToString() + ")"); 248 257 } 249 258 finally { … … 266 275 } 267 276 268 ClientCom.LogMessage("Aborted all jobs!");277 clientCom.LogMessage("Aborted all jobs!"); 269 278 } 270 279 … … 273 282 /// </summary> 274 283 private void DoPauseAll() { 275 ClientCom.LogMessage("Pause all received");284 clientCom.LogMessage("Pause all received"); 276 285 277 286 //copy guids because there will be removed items from 'Jobs' … … 290 299 /// </summary> 291 300 private void DoStopAll() { 292 ClientCom.LogMessage("Stop all received");301 clientCom.LogMessage("Stop all received"); 293 302 294 303 //copy guids because there will be removed items from 'Jobs' … … 316 325 /// </summary> 317 326 private void ShutdownCore() { 318 ClientCom.LogMessage("Shutdown Signal received");319 ClientCom.LogMessage("Stopping heartbeat");327 clientCom.LogMessage("Shutdown Signal received"); 328 clientCom.LogMessage("Stopping heartbeat"); 320 329 heartbeatManager.StopHeartBeat(); 321 330 abortRequested = true; 322 ClientCom.LogMessage("Logging out");331 clientCom.LogMessage("Logging out"); 323 332 324 333 325 334 lock (engines) { 326 ClientCom.LogMessage("engines locked");335 clientCom.LogMessage("engines locked"); 327 336 foreach (KeyValuePair<Guid, AppDomain> kvp in appDomains) { 328 ClientCom.LogMessage("Shutting down Appdomain for " + kvp.Key);337 clientCom.LogMessage("Shutting down Appdomain for " + kvp.Key); 329 338 appDomains[kvp.Key].UnhandledException -= new UnhandledExceptionEventHandler(AppDomain_UnhandledException); 330 339 AppDomain.Unload(kvp.Value); … … 332 341 } 333 342 WcfService.Instance.Disconnect(); 334 ClientCom.Shutdown();343 clientCom.Shutdown(); 335 344 SlaveClientCom.Close(); 336 345 … … 344 353 /// </summary> 345 354 private void DoStartSlave() { 346 ClientCom.LogMessage("Restart received");355 clientCom.LogMessage("Restart received"); 347 356 StartHeartbeats(); 348 ClientCom.LogMessage("Restart done");357 clientCom.LogMessage("Restart done"); 349 358 } 350 359 … … 355 364 //TODO: do we need an AbortSleep? 356 365 private void Sleep() { 357 ClientCom.LogMessage("Sleep received");366 clientCom.LogMessage("Sleep received"); 358 367 heartbeatManager.StopHeartBeat(); 359 368 heartbeatManager = null; 360 369 DoStopAll(); 361 370 WcfService.Instance.Disconnect(); 362 ClientCom.LogMessage("Sleep done");371 clientCom.LogMessage("Sleep done"); 363 372 } 364 373 … … 371 380 public void PauseWaitJob(JobData data) { 372 381 if (!Jobs.ContainsKey(data.JobId)) { 373 ClientCom.LogMessage("Can't find job with id " + data.JobId);382 clientCom.LogMessage("Can't find job with id " + data.JobId); 374 383 } else { 375 384 Job job = Jobs[data.JobId]; … … 388 397 public void SendFinishedJob(Guid jobId) { 389 398 try { 390 ClientCom.LogMessage("Getting the finished job with id: " + jobId);399 clientCom.LogMessage("Getting the finished job with id: " + jobId); 391 400 if (!engines.ContainsKey(jobId)) { 392 ClientCom.LogMessage("Engine doesn't exist");401 clientCom.LogMessage("Engine doesn't exist"); 393 402 return; 394 403 } 395 404 if (!jobs.ContainsKey(jobId)) { 396 ClientCom.LogMessage("Job doesn't exist");405 clientCom.LogMessage("Job doesn't exist"); 397 406 return; 398 407 } … … 405 414 406 415 try { 407 ClientCom.LogMessage("Sending the finished job with id: " + jobId);416 clientCom.LogMessage("Sending the finished job with id: " + jobId); 408 417 wcfService.UpdateJobData(cJob, sJob, ConfigManager.Instance.GetClientInfo().Id, JobState.Finished); 409 418 SlaveStatusInfo.JobsProcessed++; 410 419 } 411 420 catch (Exception e) { 412 ClientCom.LogMessage("Transmitting to server failed. Storing the finished job with id: " + jobId + " to hdd (" + e.ToString() + ")");421 clientCom.LogMessage("Transmitting to server failed. Storing the finished job with id: " + jobId + " to hdd (" + e.ToString() + ")"); 413 422 } 414 423 finally { … … 428 437 /// <param name="e"></param> 429 438 private void StartJobInAppDomain(Job myJob, JobData jobData) { 430 ClientCom.LogMessage("Received new job with id " + myJob.Id);439 clientCom.LogMessage("Received new job with id " + myJob.Id); 431 440 String pluginDir = Path.Combine(PluginCache.Instance.PluginTempBaseDir, myJob.Id.ToString()); 432 441 bool pluginsPrepared = false; … … 435 444 try { 436 445 PluginCache.Instance.PreparePlugins(myJob, out configFileName); 437 ClientCom.LogMessage("Plugins fetched for job " + myJob.Id);446 clientCom.LogMessage("Plugins fetched for job " + myJob.Id); 438 447 pluginsPrepared = true; 439 448 } 440 449 catch (Exception exception) { 441 ClientCom.LogMessage(string.Format("Copying plugins for job {0} failed: {1}", myJob.Id, exception));450 clientCom.LogMessage(string.Format("Copying plugins for job {0} failed: {1}", myJob.Id, exception)); 442 451 } 443 452 … … 448 457 lock (engines) { 449 458 appDomains.Add(myJob.Id, appDomain); 450 ClientCom.LogMessage("Creating AppDomain");459 clientCom.LogMessage("Creating AppDomain"); 451 460 Executor engine = (Executor)appDomain.CreateInstanceAndUnwrap(typeof(Executor).Assembly.GetName().Name, typeof(Executor).FullName); 452 ClientCom.LogMessage("Created AppDomain");461 clientCom.LogMessage("Created AppDomain"); 453 462 engine.JobId = myJob.Id; 454 463 engine.Core = this; 455 ClientCom.LogMessage("Starting Engine for job " + myJob.Id);464 clientCom.LogMessage("Starting Engine for job " + myJob.Id); 456 465 engines.Add(myJob.Id, engine); 457 466 engine.Start(jobData.Data); 458 467 SlaveStatusInfo.JobsFetched++; 459 ClientCom.LogMessage("Increment FetchedJobs to:" + SlaveStatusInfo.JobsFetched);468 clientCom.LogMessage("Increment FetchedJobs to:" + SlaveStatusInfo.JobsFetched); 460 469 } 461 470 } 462 471 catch (Exception exception) { 463 ClientCom.LogMessage("Creating the Appdomain and loading the job failed for job " + myJob.Id);464 ClientCom.LogMessage("Error thrown is: " + exception.ToString());472 clientCom.LogMessage("Creating the Appdomain and loading the job failed for job " + myJob.Id); 473 clientCom.LogMessage("Error thrown is: " + exception.ToString()); 465 474 KillAppDomain(myJob.Id); 466 475 } … … 471 480 public event EventHandler<EventArgs<Exception>> ExceptionOccured; 472 481 private void OnExceptionOccured(Exception e) { 473 ClientCom.LogMessage("Error: " + e.ToString());482 clientCom.LogMessage("Error: " + e.ToString()); 474 483 var handler = ExceptionOccured; 475 484 if (handler != null) handler(this, new EventArgs<Exception>(e)); … … 477 486 478 487 private void AppDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) { 479 ClientCom.LogMessage("Exception in AppDomain: " + e.ExceptionObject.ToString());488 clientCom.LogMessage("Exception in AppDomain: " + e.ExceptionObject.ToString()); 480 489 KillAppDomain(new Guid(e.ExceptionObject.ToString())); 481 490 } … … 508 517 } 509 518 510 ClientCom.LogMessage("Shutting down Appdomain for Job " + id);519 clientCom.LogMessage("Shutting down Appdomain for Job " + id); 511 520 lock (engines) { 512 521 try { … … 526 535 } 527 536 catch (CannotUnloadAppDomainException) { 528 ClientCom.LogMessage("Could not unload AppDomain, will try again in 1 sec.");537 clientCom.LogMessage("Could not unload AppDomain, will try again in 1 sec."); 529 538 Thread.Sleep(1000); 530 539 repeat--; … … 542 551 } 543 552 catch (Exception ex) { 544 ClientCom.LogMessage("Exception when unloading the appdomain: " + ex.ToString());553 clientCom.LogMessage("Exception when unloading the appdomain: " + ex.ToString()); 545 554 } 546 555 } -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/HeuristicLab.Clients.Hive.Slave-3.4.csproj
r5721 r5786 98 98 <ItemGroup> 99 99 <Compile Include="ConfigManager.cs" /> 100 <Compile Include="Exceptions\JobNotFoundException.cs" /> 101 <Compile Include="Exceptions\JobNotDataFoundException.cs" /> 100 102 <Compile Include="SlaveClientCom.cs" /> 101 103 <Compile Include="Core.cs" /> … … 103 105 <Compile Include="Executor.cs" /> 104 106 <Compile Include="HeartbeatManager.cs" /> 105 <Compile Include=" InvalidStateException.cs" />107 <Compile Include="Exceptions\InvalidStateException.cs" /> 106 108 <Compile Include="JobStatus.cs" /> 107 109 <Compile Include="MessageQueue.cs" /> -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive/3.4/ExperimentManager/HiveExperimentClient.cs
r5718 r5786 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Configuration; 25 using System.IO; 24 26 using System.Linq; 25 27 using System.Threading; 26 28 using HeuristicLab.Clients.Hive.Jobs; 29 using HeuristicLab.Collections; 27 30 using HeuristicLab.Common; 28 31 using HeuristicLab.Core; 29 32 using HeuristicLab.Optimization; 33 using HeuristicLab.PluginInfrastructure; 30 34 31 35 namespace HeuristicLab.Clients.Hive { 32 using System.Configuration;33 using System.IO;34 using HeuristicLab.Collections;35 using HeuristicLab.PluginInfrastructure;36 37 36 /// <summary> 38 37 /// An experiment which contains multiple batch runs of algorithms. … … 378 377 379 378 #region HiveJob Events 380 void HiveJob_JobStateChanged(object sender, EventArgs e) {379 private void HiveJob_JobStateChanged(object sender, EventArgs e) { 381 380 if (HiveJob != null) { 382 381 rootJobId = HiveJob.Job.Id; … … 520 519 HiveJob hj = hiveJob.GetHiveJobByJobId(lightweightJob.Id); 521 520 if (hj != null) { 521 DateTime lastJobDataUpdate = hj.Job.LastJobDataUpdate; 522 522 hj.UpdateFromLightweightJob(lightweightJob); 523 if ((hj.Job.State == JobState.Aborted || 524 hj.Job.State == JobState.Failed || 525 hj.Job.State == JobState.Finished) && 526 !hj.IsFinishedOptimizerDownloaded) { 523 524 // lastJobDataUpdate equals DateTime.MinValue right after it was uploaded. When the first results are polled, this value is updated 525 if (lastJobDataUpdate != DateTime.MinValue && lastJobDataUpdate < hj.Job.LastJobDataUpdate) { 527 526 LogMessage(hj.Job.Id, "Downloading optimizer for job"); 528 527 OptimizerJob optimizerJob = LoadOptimizerJob(hj.Job.Id); 529 528 if (optimizerJob == null) { 530 529 // something bad happened to this job. set to finished to allow the rest beeing downloaded 531 hj.IsFinishedOptimizerDownloaded = true;530 //hj.IsFinishedOptimizerDownloaded = true; 532 531 } else { 533 if (lightweightJob.ParentJobId.HasValue) {534 HiveJob parentHiveJob = HiveJob.GetHiveJobByJobId(lightweightJob.ParentJobId.Value);535 parentHiveJob.UpdateChildOptimizer(optimizerJob, hj.Job.Id);532 // if the job is paused, download but don't integrate into parent optimizer (to avoid Prepare) 533 if (hj.Job.State == JobState.Paused) { 534 536 535 } else { 537 this.HiveJob.IsFinishedOptimizerDownloaded = true; 536 if (lightweightJob.ParentJobId.HasValue) { 537 HiveJob parentHiveJob = HiveJob.GetHiveJobByJobId(lightweightJob.ParentJobId.Value); 538 parentHiveJob.UpdateChildOptimizer(optimizerJob, hj.Job.Id); 539 } else { 540 //this.HiveJob.IsFinishedOptimizerDownloaded = true; 541 } 538 542 } 539 543 } … … 550 554 551 555 private bool AllJobsFinished() { 552 return HiveJob.GetAllHiveJobs().All(hj => hj.IsFinishedOptimizerDownloaded); 556 //return HiveJob.GetAllHiveJobs().All(hj => hj.IsFinishedOptimizerDownloaded); 557 return HiveJob.GetAllHiveJobs().All(j => j.Job.State == JobState.Finished 558 || j.Job.State == JobState.Aborted 559 || j.Job.State == JobState.Failed); 553 560 } 554 561 -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive/3.4/ExperimentManager/HiveJobClient.cs
r5779 r5786 77 77 DergisterOptimizerEvents(); 78 78 optimizerJob = value; 79 if (optimizerJob.ExecutionState == ExecutionState.Stopped) {80 IsFinishedOptimizerDownloaded = true;81 }79 //if (optimizerJob.ExecutionState == ExecutionState.Stopped) { 80 // IsFinishedOptimizerDownloaded = true; 81 //} 82 82 RegisterOptimizerEvents(); 83 83 OnOptimizerJobChanged(); … … 91 91 } 92 92 93 private bool isFinishedOptimizerDownloaded;94 public bool IsFinishedOptimizerDownloaded {95 get { return isFinishedOptimizerDownloaded; }96 set {97 if (isFinishedOptimizerDownloaded != value) {98 isFinishedOptimizerDownloaded = value;99 OnIsFinishedOptimizerDownloadedChanged();100 }101 }102 }93 //private bool isFinishedOptimizerDownloaded; 94 //public bool IsFinishedOptimizerDownloaded { 95 // get { return isFinishedOptimizerDownloaded; } 96 // set { 97 // if (isFinishedOptimizerDownloaded != value) { 98 // isFinishedOptimizerDownloaded = value; 99 // OnIsFinishedOptimizerDownloadedChanged(); 100 // } 101 // } 102 //} 103 103 104 104 private bool syncJobsWithOptimizers = true; … … 331 331 } 332 332 if (childIsFinishedOptimizerDownloaded) { 333 child.IsFinishedOptimizerDownloaded = true;333 //child.IsFinishedOptimizerDownloaded = true; // todo: clean up with childIsFinishedOptimizerDownloaded 334 334 } 335 335 syncJobsWithOptimizers = true; … … 345 345 } 346 346 foreach (IRun run in optimizerJob.Optimizer.Runs) { 347 if (!batchRun.Runs.Contains(run)) 347 if (!batchRun.Runs.Contains(run)) { 348 run.Name = GetNewRunName(run, batchRun.Runs); 348 349 batchRun.Runs.Add(run); 350 } 351 } 352 } 353 354 /// <summary> 355 /// Parses the run numbers out of runs and renames the run to the next number 356 /// </summary> 357 private static string GetNewRunName(IRun run, RunCollection runs) { 358 int idx = run.Name.IndexOf("Run ") + 4; 359 360 if (idx == -1 || runs.Count == 0) 361 return run.Name; 362 363 int maxRunNumber = int.MinValue; 364 foreach (IRun r in runs) { 365 int number = GetRunNumber(r.Name); 366 maxRunNumber = Math.Max(maxRunNumber, number); 367 } 368 369 return run.Name.Substring(0, idx) + (maxRunNumber + 1).ToString(); 370 } 371 372 /// <summary> 373 /// Parses the number of a Run out of its name. Example "Genetic Algorithm Run 3" -> 3 374 /// </summary> 375 private static int GetRunNumber(string runName) { 376 int idx = runName.IndexOf("Run ") + 4; 377 if (idx == -1) { 378 return 0; 379 } else { 380 return int.Parse(runName.Substring(idx, runName.Length - idx)); 349 381 } 350 382 } … … 403 435 if (lightweightJob != null) { 404 436 job.Id = lightweightJob.Id; 405 job. Id = lightweightJob.Id;437 job.ParentJobId = lightweightJob.ParentJobId; 406 438 job.ExecutionTime = lightweightJob.ExecutionTime; 407 439 job.State = lightweightJob.State; 408 440 job.StateLog = new List<StateLog>(lightweightJob.StateLog); 409 // what about parentJob 441 job.Command = lightweightJob.Command; 442 job.LastJobDataUpdate = lightweightJob.LastJobDataUpdate; 443 410 444 OnJobStateChanged(); 411 445 OnToStringChanged(); -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive/3.4/ServiceClients/HiveServiceClient.cs
r5779 r5786 94 94 95 95 [System.Runtime.Serialization.OptionalFieldAttribute()] 96 private System.DateTime LastJobDataUpdateField; 97 98 [System.Runtime.Serialization.OptionalFieldAttribute()] 96 99 private System.Nullable<System.Guid> ParentJobIdField; 97 100 … … 132 135 this.ExecutionTimeField = value; 133 136 this.RaisePropertyChanged("ExecutionTime"); 137 } 138 } 139 } 140 141 [System.Runtime.Serialization.DataMemberAttribute()] 142 public System.DateTime LastJobDataUpdate 143 { 144 get 145 { 146 return this.LastJobDataUpdateField; 147 } 148 set 149 { 150 if ((this.LastJobDataUpdateField.Equals(value) != true)) 151 { 152 this.LastJobDataUpdateField = value; 153 this.RaisePropertyChanged("LastJobDataUpdate"); 134 154 } 135 155 } -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive/3.4/ServiceClients/LightweightJob.cs
r5779 r5786 43 43 this.State = job.State; 44 44 this.Command = job.Command; 45 this.LastJobDataUpdate = job.LastJobDataUpdate; 45 46 } 46 47 … … 52 53 this.State = original.State; 53 54 this.Command = original.Command; 55 this.LastJobDataUpdate = original.LastJobDataUpdate; 54 56 } 55 57 public override IDeepCloneable Clone(Cloner cloner) { -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive.Common/3.4/ApplicationConstants.cs
r5779 r5786 33 33 public static System.Transactions.IsolationLevel IsolationLevelScope = System.Transactions.IsolationLevel.ReadUncommitted; 34 34 35 public static int HeartbeatTimeout = 120; // value in seconds 35 public static TimeSpan SlaveHeartbeatTimeout = TimeSpan.FromMinutes(1); 36 37 public static TimeSpan CalculatingJobHeartbeatTimeout = TimeSpan.FromMinutes(1); 38 39 public static TimeSpan TransferringJobHeartbeatTimeout = TimeSpan.FromMinutes(5); 36 40 37 41 /// <summary> 38 42 /// Interval in which the HL.HiveExperiment will poll results from server 39 43 /// </summary> 40 public static TimeSpan ResultPollingInterval = new TimeSpan(0, 0,5);44 public static TimeSpan ResultPollingInterval = TimeSpan.FromSeconds(5); 41 45 42 46 /// <summary> -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive.Common/3.4/DataTransfer/LightweightJob.cs
r5779 r5786 39 39 [DataMember] 40 40 public Command? Command { get; set; } 41 [DataMember] 42 public DateTime LastJobDataUpdate { get; set; } 41 43 42 44 public StateLog CurrentStateLog { get { return StateLog.LastOrDefault(); } } … … 55 57 this.State = job.State; 56 58 this.Command = job.Command; 59 this.LastJobDataUpdate = job.LastJobDataUpdate; 57 60 } 58 61 } -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive.DataAccess/3.4/Convert.cs
r5779 r5786 43 43 IsParentJob = source.IsParentJob, 44 44 FinishWhenChildJobsFinished = source.FinishWhenChildJobsFinished, 45 Command = source.Command 45 Command = source.Command, 46 LastJobDataUpdate = source.JobData.LastUpdate 46 47 }; 47 48 } -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive.DataAccess/3.4/HiveDao.cs
r5779 r5786 35 35 } 36 36 37 public HiveDao() { 38 } 37 public HiveDao() { } 39 38 40 39 #region Job Methods -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive.Tests/DaoTests.cs
r5779 r5786 32 32 job1.Command = Command.Pause; 33 33 34 DT.JobData jobData1 = new DT.JobData(); 35 jobData1.Data = new byte[] { 0, 1, 2, 3, 4, 5 }; 36 jobData1.LastUpdate = DateTime.Now; 37 34 38 DT.Plugin plugin1 = new DT.Plugin(); 35 39 plugin1.Name = "Tests.MyPlugin"; … … 51 55 52 56 job1.Id = dao.AddJob(job1); 57 jobData1.JobId = job1.Id; 58 dao.AddJobData(jobData1); 53 59 54 60 DT.Job job1loaded = dao.GetJob(job1.Id); … … 60 66 Assert.AreEqual(job1.StateLog.Count, job1loaded.StateLog.Count); 61 67 Assert.AreEqual(job1.Command, job1loaded.Command); 68 Assert.IsTrue(Math.Abs((job1loaded.LastJobDataUpdate - jobData1.LastUpdate).TotalSeconds) < 1); 62 69 for (int i = 0; i < job1.StateLog.Count; i++) { 63 70 Assert.AreEqual(job1.Id, job1loaded.StateLog[i].JobId); … … 88 95 } 89 96 97 DT.JobData jobData1Loaded = dao.GetJobData(job1.Id); 98 Assert.AreEqual(jobData1.JobId, jobData1Loaded.JobId); 99 Assert.IsTrue(Math.Abs((jobData1.LastUpdate - jobData1Loaded.LastUpdate).TotalSeconds) < 1); 100 Assert.IsTrue(jobData1.Data.SequenceEqual(jobData1Loaded.Data)); 101 90 102 dao.DeleteJob(job1.Id); 91 103 92 104 Assert.AreEqual(null, dao.GetJob(job1.Id)); 105 Assert.AreEqual(null, dao.GetJobData(job1.Id)); 93 106 } 94 107 -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive/3.4/HeartbeatManager.cs
r5779 r5786 52 52 53 53 private void AssignJob(Slave slave, Job job) { 54 dao.UpdateJobState(job.Id, JobState.Transferring, slave.Id, null, null);54 job = dao.UpdateJobState(job.Id, JobState.Transferring, slave.Id, null, null); 55 55 dao.UpdateSlave(slave); 56 57 // from now on the job has some time to send the next heartbeat (ApplicationConstants.TransferringJobHeartbeatTimeout) 58 job.LastHeartbeat = DateTime.Now; 59 dao.UpdateJob(job); 56 60 } 57 61 -
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Services.Hive/3.4/LifecycleManager.cs
r5779 r5786 25 25 log.Log("LifecycleManager.Cleanup()"); 26 26 SetTimeoutSlavesOffline(); 27 FinishParentJobs(); 27 SetTimeoutJobsWaiting(); 28 FinishParentJobs(); 28 29 } 29 30 … … 34 35 var slaves = dao.GetSlaves(x => x.SlaveState != SlaveState.Offline); 35 36 foreach (Slave slave in slaves) { 36 if (!slave.LastHeartbeat.HasValue || (DateTime.Now - slave.LastHeartbeat.Value) .TotalSeconds > ApplicationConstants.HeartbeatTimeout) {37 if (!slave.LastHeartbeat.HasValue || (DateTime.Now - slave.LastHeartbeat.Value) > ApplicationConstants.SlaveHeartbeatTimeout) { 37 38 slave.SlaveState = SlaveState.Offline; 38 39 SetJobsWaiting(slave.Id); … … 55 56 var jobs = dao.GetJobs(x => x.State == JobState.Calculating).Where(x => x.StateLog.Last().SlaveId == slaveId); 56 57 foreach (var j in jobs) { 57 Job job = dao.UpdateJobState(j.Id, JobState.Waiting, slaveId, null, "Slave timed out ");58 Job job = dao.UpdateJobState(j.Id, JobState.Waiting, slaveId, null, "Slave timed out."); 58 59 job.Command = null; 59 60 dao.UpdateJob(job); … … 61 62 } 62 63 64 /// <summary> 65 /// Looks for jobs which have not sent heartbeats for some time and reschedules them for calculation 66 /// </summary> 67 private void SetTimeoutJobsWaiting() { 68 var jobs = dao.GetJobs(x => (x.State == JobState.Calculating && (DateTime.Now - x.LastHeartbeat) > ApplicationConstants.CalculatingJobHeartbeatTimeout) 69 || (x.State == JobState.Transferring && (DateTime.Now - x.LastHeartbeat) > ApplicationConstants.TransferringJobHeartbeatTimeout)); 70 foreach (var j in jobs) { 71 Job job = dao.UpdateJobState(j.Id, JobState.Waiting, null, null, "Slave timed out."); 72 job.Command = null; 73 dao.UpdateJob(job); 74 } 75 } 63 76 } 64 77 } -
branches/HeuristicLab.Hive-3.4/sources/MergeConfigs.cmd
r5095 r5786 1 1 copy "%SolutionDir%HeuristicLab.Hive 3.4.dll.config" "%TargetDir%" 2 2 3 %SolutionDir%ConfigMerger "%SolutionDir%HeuristicLab.Services.Hive\3.4\app.config" "%TargetDir%HeuristicLab.Hive 3.4.dll.config"4 3 %SolutionDir%ConfigMerger "%SolutionDir%HeuristicLab.Services.Hive.DataAccess\3.4\app.config" "%TargetDir%HeuristicLab.Hive 3.4.dll.config" 5 4 %SolutionDir%ConfigMerger "%SolutionDir%HeuristicLab.Clients.Hive\3.4\app.config" "%TargetDir%HeuristicLab.Hive 3.4.dll.config"
Note: See TracChangeset
for help on using the changeset viewer.