Free cookie consent management tool by TermsFeed Policy Generator

Changeset 6216


Ignore:
Timestamp:
05/17/11 11:26:48 (12 years ago)
Author:
ascheibe
Message:

#1233

  • make UsedCores more reliable
  • some cosmetic fixes
Location:
branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/ConfigManager.cs

    r6203 r6216  
    6767    }
    6868
     69    public int GetFreeCores() {
     70      return slave.Cores.HasValue ? slave.Cores.Value - SlaveStatusInfo.UsedCores : 0;
     71    }
     72
    6973    /// <summary>
    7074    /// collects and returns information that get displayed by the Client Console
     
    7983
    8084      st.TotalCores = slave.Cores.HasValue ? slave.Cores.Value : 0;
    81       st.FreeCores = slave.Cores.HasValue ? slave.Cores.Value - SlaveStatusInfo.UsedCores : 0;
     85      st.FreeCores = GetFreeCores();
    8286
    8387      st.JobsAborted = SlaveStatusInfo.JobsAborted;
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/Core.cs

    r6204 r6216  
    3737  /// </summary>
    3838  public class Core : MarshalByRefObject {
    39     public EventLog ServiceEventLog { get; set; }
    40 
    41     public static bool abortRequested { get; set; }
     39    private ISlaveCommunication clientCom;
     40    private ServiceHost slaveComm;
    4241    private Semaphore waitShutdownSem = new Semaphore(0, 1);
    43     public static ILog Log { get; set; }
    44 
    4542    private Dictionary<Guid, SlaveJob> slaveJobs = new Dictionary<Guid, SlaveJob>();
    46 
    4743    private WcfService wcfService;
    4844    private static HeartbeatManager heartbeatManager;
     45
    4946    public static HeartbeatManager HBManager { get { return heartbeatManager; } }
    50 
    51     private ISlaveCommunication clientCom;
    52     private ServiceHost slaveComm;
    53 
    54     public Dictionary<Guid, SlaveJob> SlaveJobs {
    55       get { return slaveJobs; }
    56     }
     47    public static ILog Log { get; set; }
     48    public EventLog ServiceEventLog { get; set; }
     49    public static bool abortRequested { get; set; }
     50    public Dictionary<Guid, SlaveJob> SlaveJobs { get { return slaveJobs; } }
    5751
    5852    public Core() { }
     
    8680        if (ServiceEventLog != null) {
    8781          try {
    88             ServiceEventLog.WriteEntry("Hive Slave threw exception: " + ex.ToString() + " with stack trace: " + ex.StackTrace);
     82            ServiceEventLog.WriteEntry(string.Format("Hive Slave threw exception: {0} with stack trace: {1}", ex.ToString(), ex.StackTrace));
    8983          }
    9084          catch (Exception) { }
     
    9286          //try to log with clientCom. if this works the user sees at least a message,
    9387          //else an exception will be thrown anyways.
    94           clientCom.LogMessage("Uncaught exception: " + ex.ToString() +
    95             Environment.NewLine + "Core is going to shutdown.");
     88          clientCom.LogMessage(string.Format("Uncaught exception: {0} {1} Core is going to shutdown.", ex.ToString(), Environment.NewLine));
    9689        }
    9790        ShutdownCore();
     
    133126
    134127    void WcfService_ExceptionOccured(object sender, EventArgs<Exception> e) {
    135       clientCom.LogMessage("Connection to server interruped with exception: " + e.Value.Message);
     128      clientCom.LogMessage(string.Format("Connection to server interruped with exception: {0}", e.Value.Message));
    136129    }
    137130
     
    145138    /// <param name="container">The container, containing the message</param>
    146139    private void DetermineAction(MessageContainer container) {
    147       clientCom.LogMessage("Message: " + container.Message.ToString() + " for job: " + container.JobId);
     140      clientCom.LogMessage(string.Format("Message: {0} for job: {1} ", container.Message.ToString(), container.JobId));
    148141
    149142      if (container is ExecutorMessageContainer<Guid>) {
     
    153146        switch (container.Message) {
    154147          case MessageContainer.MessageType.CalculateJob:
    155             Task.Factory.StartNew((jobIdObj) => {
    156               Guid jobId = (Guid)jobIdObj;
    157               SlaveJob newJob = new SlaveJob(this);
    158               bool start = true;
    159 
    160               lock (slaveJobs) {
    161                 if (slaveJobs.ContainsKey(jobId)) {
    162                   start = false;
    163                   clientCom.LogMessage(string.Format("Job with id {0} already exists. Start aborted.", jobId));
    164                 } else {
    165                   slaveJobs.Add(jobId, newJob);
    166                 }
    167               }
    168 
    169               if (start) {
    170                 newJob.CalculateJob(jobId);
    171               }
    172             }, container.JobId)
     148            Task.Factory.StartNew(HandleCalculateJob, container.JobId)
    173149            .ContinueWith((t) => {
    174150              // handle exception of task
     
    193169            SlaveStatusInfo.IncrementJobsAborted(); //TODO: move to a sane place
    194170
    195             Task.Factory.StartNew((jobIdObj) => {
    196               Guid jobId = (Guid)jobIdObj;
    197               bool abort = true;
    198               SlaveJob sj = null;
    199 
    200               lock (slaveJobs) {
    201                 if (!slaveJobs.ContainsKey(jobId)) {
    202                   clientCom.LogMessage(string.Format("Job with id {0} doesn't exist. Abort aborted.", jobId));
    203                   abort = false;
    204                 } else {
    205                   sj = slaveJobs[jobId];
    206                 }
    207               }
    208               if (abort && !sj.Finished) {
    209                 sj.KillAppDomain();
    210               }
    211             }, container.JobId)
     171            Task.Factory.StartNew(HandleAbortJob, container.JobId)
    212172             .ContinueWith((t) => {
    213                // handle exception of task
    214173               clientCom.LogMessage(t.Exception.ToString());
    215174             }, TaskContinuationOptions.OnlyOnFaulted);
    216175            break;
    217176          case MessageContainer.MessageType.StopJob:
    218             Task.Factory.StartNew((jobIdObj) => {
    219               Guid jobId = (Guid)jobIdObj;
    220               bool stop = true;
    221               SlaveJob sj = null;
    222 
    223               lock (slaveJobs) {
    224                 if (!slaveJobs.ContainsKey(jobId)) {
    225                   clientCom.LogMessage(string.Format("Job with id {0} doesn't exist. Stop aborted.", jobId));
    226                   stop = false;
    227                 } else {
    228                   sj = slaveJobs[jobId];
    229                 }
    230               }
    231               if (stop && !sj.Finished) {
    232                 sj.StopJob();
    233               }
    234             }, container.JobId)
     177            Task.Factory.StartNew(HandleStopJob, container.JobId)
    235178             .ContinueWith((t) => {
    236                // handle exception of task
    237179               clientCom.LogMessage(t.Exception.ToString());
    238180             }, TaskContinuationOptions.OnlyOnFaulted);
    239181            break;
    240182          case MessageContainer.MessageType.PauseJob:
    241             Task.Factory.StartNew((jobIdObj) => {
    242               Guid jobId = (Guid)jobIdObj;
    243               bool pause = true;
    244               SlaveJob sj = null;
    245 
    246               lock (slaveJobs) {
    247                 if (!slaveJobs.ContainsKey(jobId)) {
    248                   clientCom.LogMessage(string.Format("Job with id {0} doesn't exist. Pause aborted.", jobId));
    249                   pause = false;
    250                 } else {
    251                   sj = slaveJobs[jobId];
    252                 }
    253               }
    254               if (pause && !sj.Finished) {
    255                 sj.PauseJob();
    256               }
    257             }, container.JobId)
     183            Task.Factory.StartNew(HandlePauseJob, container.JobId)
    258184             .ContinueWith((t) => {
    259                // handle exception of task
    260185               clientCom.LogMessage(t.Exception.ToString());
    261186             }, TaskContinuationOptions.OnlyOnFaulted);
     
    273198      } else {
    274199        clientCom.LogMessage("Unknown MessageContainer: " + container);
     200      }
     201    }
     202
     203    private void HandleCalculateJob(object jobIdObj) {
     204      Guid jobId = (Guid)jobIdObj;
     205      SlaveJob newJob = new SlaveJob(this);
     206      bool start = true;
     207
     208      lock (slaveJobs) {
     209        if (ConfigManager.Instance.GetFreeCores() < 1) {
     210          wcfService.UpdateJobState(jobId, JobState.Waiting, "Slave set status waiting because no cores were available");
     211          clientCom.LogMessage(string.Format("Setting job with id {0} to waiting, all cores are used", jobId));
     212          start = false;
     213        } else {
     214          if (slaveJobs.ContainsKey(jobId)) {
     215            start = false;
     216            clientCom.LogMessage(string.Format("Job with id {0} already exists. Start aborted.", jobId));
     217          } else {
     218            slaveJobs.Add(jobId, newJob);
     219            newJob.PrepareJob(jobId);
     220          }
     221        }
     222      }
     223
     224      if (start) {
     225        newJob.CalculateJob();
     226      }
     227    }
     228
     229    private void HandleAbortJob(object jobIdObj) {
     230      Guid jobId = (Guid)jobIdObj;
     231      bool abort = true;
     232      SlaveJob sj = null;
     233
     234      lock (slaveJobs) {
     235        if (!slaveJobs.ContainsKey(jobId)) {
     236          clientCom.LogMessage(string.Format("Job with id {0} doesn't exist. Abort aborted.", jobId));
     237          abort = false;
     238        } else {
     239          sj = slaveJobs[jobId];
     240        }
     241      }
     242
     243      if (abort && !sj.Finished) {
     244        sj.KillAppDomain();
     245      }
     246    }
     247
     248    private void HandleStopJob(object jobIdObj) {
     249      Guid jobId = (Guid)jobIdObj;
     250      bool stop = true;
     251      SlaveJob sj = null;
     252
     253      lock (slaveJobs) {
     254        if (!slaveJobs.ContainsKey(jobId)) {
     255          clientCom.LogMessage(string.Format("Job with id {0} doesn't exist. Stop aborted.", jobId));
     256          stop = false;
     257        } else {
     258          sj = slaveJobs[jobId];
     259        }
     260      }
     261
     262      if (stop && !sj.Finished) {
     263        sj.StopJob();
     264      }
     265    }
     266
     267    private void HandlePauseJob(object jobIdObj) {
     268      Guid jobId = (Guid)jobIdObj;
     269      bool pause = true;
     270      SlaveJob sj = null;
     271
     272      lock (slaveJobs) {
     273        if (!slaveJobs.ContainsKey(jobId)) {
     274          clientCom.LogMessage(string.Format("Job with id {0} doesn't exist. Pause aborted.", jobId));
     275          pause = false;
     276        } else {
     277          sj = slaveJobs[jobId];
     278        }
     279      }
     280
     281      if (pause && !sj.Finished) {
     282        sj.PauseJob();
    275283      }
    276284    }
     
    372380    }
    373381
    374     /// <summary>
    375     /// Enqueues messages from the executor to the message queue.
    376     /// This is necessary if the core thread has to execute certain actions, e.g.
    377     /// killing of an app domain.
    378     /// </summary>   
    379     /// <returns>true if the calling method can continue execution, else false</returns>
    380     public void EnqueueExecutorMessage<T>(Action<T> action, T parameter) {
    381       ExecutorMessageContainer<T> container = new ExecutorMessageContainer<T>();
    382       container.Callback = action;
    383       container.CallbackParameter = parameter;
    384       MessageQueue.GetInstance().AddMessage(container);
    385     }
    386 
    387382    public void RemoveSlaveJobFromList(Guid jobId) {
    388383      lock (slaveJobs) {
  • branches/HeuristicLab.Hive-3.4/sources/HeuristicLab.Clients.Hive.Slave/3.4/SlaveJob.cs

    r6203 r6216  
    3434    }
    3535
    36     public void CalculateJob(Guid jobId) {
     36    public void PrepareJob(Guid jobId) {
     37      JobId = jobId;
    3738      Job job = wcfService.GetJob(jobId);
    3839      if (job == null) throw new JobNotFoundException(jobId);
    3940      coresNeeded = job.CoresNeeded;
    4041      SlaveStatusInfo.IncrementUsedCores(coresNeeded);
     42    }
     43
     44    public void CalculateJob() {
     45      Job job = wcfService.GetJob(JobId);
     46      if (job == null) throw new JobNotFoundException(JobId);
    4147
    4248      JobData jobData = wcfService.GetJobData(job.Id);
    43       if (jobData == null) throw new JobDataNotFoundException(jobId);
     49      if (jobData == null) throw new JobDataNotFoundException(JobId);
    4450      SlaveStatusInfo.IncrementJobsFetched();
    4551      job = wcfService.UpdateJobState(job.Id, JobState.Calculating, null);
    46       if (job == null) throw new JobNotFoundException(jobId);
     52      if (job == null) throw new JobNotFoundException(JobId);
    4753      StartJobInAppDomain(job, jobData);
    4854    }
     
    6672            SlaveStatusInfo.IncrementJobsAborted();
    6773
    68             clientCom.LogMessage("Sending the stopped job with id: " + job.Id);
     74            clientCom.LogMessage(string.Format("Sending the stopped job with id: {0}", job.Id));
    6975            wcfService.UpdateJobData(job, sJob, ConfigManager.Instance.GetClientInfo().Id, JobState.Aborted);
    7076          }
    7177          catch (Exception e) {
    72             clientCom.LogMessage("Transmitting to server failed. Storing the paused job with id: " + job.Id + " to hdd (" + e.ToString() + ")");
     78            clientCom.LogMessage(string.Format("Transmitting the stopped job with id {0} to server failed. Exception is: {1}", job.Id, e.ToString()));
    7379          }
    7480          finally {
     
    101107          }
    102108          catch (Exception e) {
    103             clientCom.LogMessage("Transmitting to server failed. Storing the paused job with id: " + job.Id + " to hdd (" + e.ToString() + ")");
     109            clientCom.LogMessage(string.Format("Transmitting the paused job with id {0} to server failed. Exception is: {1}", job.Id, e.ToString()));
    104110          }
    105111          finally {
     
    138144    public void SendFinishedJob() {
    139145      try {
    140         clientCom.LogMessage("Getting the finished job with id: " + JobId);
     146        clientCom.LogMessage(string.Format("Getting the finished job with id: {0} ", JobId));
    141147        if (executor == null) {
    142148          clientCom.LogMessage(string.Format("SendFinishedJob: Can't pause job with id {0} with uninitialized executor", JobId));
     
    159165        try {
    160166          JobData sJob = executor.GetFinishedJob();
    161           clientCom.LogMessage("Sending the finished job with id: " + JobId);
     167          clientCom.LogMessage(string.Format("Sending the finished job with id: {0}", JobId));
    162168          wcfService.UpdateJobData(job, sJob, ConfigManager.Instance.GetClientInfo().Id, JobState.Finished);
    163169        }
    164170        catch (Exception e) {
    165           clientCom.LogMessage("Transmitting to server failed. Storing the finished job with id: " + JobId + " to hdd (" + e.ToString() + ")");
     171          clientCom.LogMessage(string.Format("Transmitting the job with id {0} to server failed. Exception is: {1}", job.Id, e.ToString()));
    166172        }
    167173        finally {
     
    171177      }
    172178      catch (Exception e) {
    173         clientCom.LogMessage("SendFinishedJob: The following exception has been thrown: " + e.ToString());
     179        clientCom.LogMessage(string.Format("SendFinishedJob: The following exception has been thrown: {0}", e.ToString()));
    174180      }
    175181    }
     
    181187      JobId = job.Id;
    182188
    183       clientCom.LogMessage("Received new job with id " + job.Id);
     189      clientCom.LogMessage(string.Format("Received new job with id {0}", job.Id));
    184190      clientCom.StatusChanged(ConfigManager.Instance.GetStatusForClientConsole());
    185191
     
    190196      try {
    191197        PluginCache.Instance.PreparePlugins(job, out configFileName);
    192         clientCom.LogMessage("Plugins fetched for job " + job.Id);
     198        clientCom.LogMessage(string.Format("Plugins fetched for job {0}", job.Id));
    193199        pluginsPrepared = true;
    194200      }
     
    214220          executor.CoresNeeded = job.CoresNeeded;
    215221          executor.MemoryNeeded = job.MemoryNeeded;
    216           clientCom.LogMessage("Starting Executor for job " + job.Id);
     222          clientCom.LogMessage(string.Format("Starting Executor for job {0}", job.Id));
    217223
    218224          executor.Start(jobData.Data);
     
    222228        }
    223229        catch (Exception exception) {
    224           clientCom.LogMessage("Creating the Appdomain and loading the job failed for job " + job.Id);
    225           clientCom.LogMessage("Error thrown is: " + exception.ToString());
     230          clientCom.LogMessage(string.Format("Creating the Appdomain and loading the job failed for job {0}", job.Id));
     231          clientCom.LogMessage(string.Format("Error thrown is: {0}", exception.ToString()));
    226232
    227233          if (executor != null && executor.CurrentException != string.Empty) {
     
    243249    /// <param name="JobId">the GUID of the job</param>   
    244250    public void KillAppDomain() {
    245       clientCom.LogMessage("Shutting down Appdomain for Job " + JobId);
     251      clientCom.LogMessage(string.Format("Shutting down Appdomain for Job {0}", JobId));
    246252
    247253      try {
     
    280286      }
    281287      catch (Exception ex) {
    282         clientCom.LogMessage("Exception when unloading the appdomain: " + ex.ToString());
     288        clientCom.LogMessage(string.Format("Exception when unloading the appdomain: {0}", ex.ToString()));
    283289      }
    284290      finally {
     
    291297
    292298    private void AppDomain_UnhandledException(object sender, UnhandledExceptionEventArgs e) {
    293       clientCom.LogMessage("Exception in AppDomain: " + e.ExceptionObject.ToString());
     299      clientCom.LogMessage(string.Format("Exception in AppDomain: ", e.ExceptionObject.ToString()));
    294300      KillAppDomain();
    295301    }
Note: See TracChangeset for help on using the changeset viewer.