Changeset 6112


Ignore:
Timestamp:
05/03/11 18:08:42 (10 years ago)
Author:
ascheibe
Message:

#1233

  • HeartbeatManager: don't sleep while starting jobs
  • Executor: make Start() blocking
  • shutdown properly if an uncaught exception is thrown
Location:
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/Core.cs

    r6110 r6112  
    100100          //try to log with clientCom. if this works the user sees at least a message,
    101101          //else an exception will be thrown anyways.
    102           clientCom.LogMessage("Error on startup: " + ex.ToString() +
     102          clientCom.LogMessage("Uncaught exception: " + ex.ToString() +
    103103            Environment.NewLine + "Core is going to shutdown.");
    104104        }
     105        ShutdownCore();
    105106      }
    106107      finally {
     
    177178              // handle exception of task
    178179              clientCom.LogMessage(t.Exception.ToString());
     180              wcfService.UpdateJobState(container.JobId, JobState.Failed, t.Exception.ToString());
     181              SlaveStatusInfo.JobsAborted++;
    179182            }, TaskContinuationOptions.OnlyOnFaulted);
    180183            break;
     
    474477        catch (Exception exception) {
    475478          clientCom.LogMessage(string.Format("Copying plugins for job {0} failed: {1}", myJob.Id, exception));
     479          wcfService.UpdateJobState(myJob.Id, JobState.Failed, exception.ToString());
     480          SlaveStatusInfo.JobsAborted++;
    476481          lock (executors) {
    477482            if (jobs.ContainsKey(myJob.Id)) {
     
    493498            executor.Core = this;
    494499            clientCom.LogMessage("Starting Executor for job " + myJob.Id);
    495 
    496500            executor.Start(jobData.Data);
    497            
     501
    498502            lock (executors) {
    499503              executors.Add(myJob.Id, executor);
     
    503507            clientCom.LogMessage("Creating the Appdomain and loading the job failed for job " + myJob.Id);
    504508            clientCom.LogMessage("Error thrown is: " + exception.ToString());
     509
     510            if (executors.ContainsKey(myJob.Id) && executors[myJob.Id].CurrentException != string.Empty) {
     511              wcfService.UpdateJobState(myJob.Id, JobState.Failed, executors[myJob.Id].CurrentException);
     512            } else {
     513              wcfService.UpdateJobState(myJob.Id, JobState.Failed, exception.ToString());
     514            }
     515            SlaveStatusInfo.JobsAborted++;
     516
    505517            KillAppDomain(myJob.Id);
    506518          }
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/Executor.cs

    r6110 r6112  
    3737    public Core Core { get; set; }
    3838    private Semaphore pauseStopSem = new Semaphore(0, 1);
     39    private Semaphore startJobSem = new Semaphore(0, 1);
    3940
    4041    public bool SendHeartbeatForExecutor { get; set; }
     
    8687        } else {
    8788          Job.Start();
    88 
     89          startJobSem.WaitOne();
    8990        }
    9091      }
     
    140141      Job.DeleteChildJobs += new EventHandler(Job_DeleteChildJobs);
    141142      Job.JobPaused += new EventHandler(Job_JobPaused);
     143      Job.JobStarted += new EventHandler(Job_JobStarted);
    142144    }
    143145
     
    149151      Job.DeleteChildJobs -= new EventHandler(Job_DeleteChildJobs);
    150152      Job.JobPaused -= new EventHandler(Job_JobPaused);
     153      Job.JobStarted -= new EventHandler(Job_JobStarted);
    151154    }
    152155
     
    237240    private void Job_JobPaused(object sender, EventArgs e) {
    238241      pauseStopSem.Release();
     242    }
     243
     244    void Job_JobStarted(object sender, EventArgs e) {
     245      startJobSem.Release();
    239246    }
    240247
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/HeartbeatManager.cs

    r5826 r6112  
    3636    private WcfService wcfService;
    3737    private bool threadStopped;
    38     ReaderWriterLockSlim heartBeatThreadIsSleepingLock = new ReaderWriterLockSlim();
    3938
    4039    public HeartbeatManager() {
     
    7574
    7675    private void RunHeartBeatThread() {
    77       bool sleepForever;
    7876      while (!threadStopped) {
    79         sleepForever = false;
    8077        SlaveClientCom.Instance.ClientCom.StatusChanged(ConfigManager.Instance.GetStatusForClientConsole());
    8178
     
    108105                msgs.ForEach(mc => SlaveClientCom.Instance.ClientCom.LogMessage(mc.Message.ToString()));
    109106                msgs.ForEach(mc => MessageQueue.GetInstance().AddMessage(mc));
    110                 //after fetching a job, we sleep until the core wakes us up!!
    111                 msgs.ForEach(s => { if (s.Message == MessageContainer.MessageType.CalculateJob) sleepForever = true; });
    112107              }
    113108            }
     
    118113          OnExceptionOccured(e);
    119114        }
    120         if (sleepForever)
    121           waitHandle.WaitOne();
    122         else
    123           waitHandle.WaitOne(this.Interval);
     115        waitHandle.WaitOne(this.Interval);
    124116      }
    125117      waitHandle.Close();
Note: See TracChangeset for help on using the changeset viewer.