[6984] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
| 23 | using System.Collections.Generic;
|
---|
| 24 | using System.Linq;
|
---|
| 25 | using System.Threading;
|
---|
| 26 | using System.Threading.Tasks;
|
---|
| 27 | using HeuristicLab.Clients.Hive;
|
---|
| 28 | using HeuristicLab.Common;
|
---|
| 29 | using HeuristicLab.Core;
|
---|
| 30 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 31 |
|
---|
| 32 | namespace HeuristicLab.HiveEngine {
|
---|
| 33 | /// <summary>
|
---|
| 34 | /// Represents an engine that executes operations which can be executed in parallel on the hive
|
---|
| 35 | /// </summary>
|
---|
| 36 | [StorableClass]
|
---|
| 37 | [Item("Hive Engine", "Engine for parallel execution on the hive. You need enable `Parallel` for at least one operator in your operator graph to have all childoperations parallelized. Also those childoperations must not have sideeffects on a higher scope.")]
|
---|
| 38 | public class HiveEngine : Engine {
|
---|
| 39 | private static object locker = new object();
|
---|
| 40 | private CancellationToken cancellationToken;
|
---|
| 41 | private bool firstRun = true;
|
---|
| 42 |
|
---|
| 43 | [Storable]
|
---|
| 44 | private IOperator currentOperator;
|
---|
| 45 |
|
---|
| 46 | [Storable]
|
---|
| 47 | public string ResourceNames { get; set; }
|
---|
| 48 |
|
---|
| 49 | [Storable]
|
---|
| 50 | private int priority;
|
---|
| 51 | public int Priority {
|
---|
| 52 | get { return priority; }
|
---|
| 53 | set { priority = value; }
|
---|
| 54 | }
|
---|
| 55 |
|
---|
| 56 | [Storable]
|
---|
| 57 | private TimeSpan executionTimeOnHive;
|
---|
| 58 | public TimeSpan ExecutionTimeOnHive {
|
---|
| 59 | get { return executionTimeOnHive; }
|
---|
| 60 | set {
|
---|
| 61 | if (value != executionTimeOnHive) {
|
---|
| 62 | executionTimeOnHive = value;
|
---|
| 63 | OnExecutionTimeOnHiveChanged();
|
---|
| 64 | }
|
---|
| 65 | }
|
---|
| 66 | }
|
---|
| 67 |
|
---|
| 68 | [Storable]
|
---|
| 69 | private bool isPrivileged;
|
---|
| 70 | public bool IsPrivileged {
|
---|
| 71 | get { return isPrivileged; }
|
---|
| 72 | set { isPrivileged = value; }
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | // Task can't be storable, so RefreshableHiveExperiment can't be stored. But as previous runs are only informative it does not matter (only execution time on hive will be wrong because of that -> Todo)
|
---|
| 76 | private ItemCollection<RefreshableJob> jobs = new ItemCollection<RefreshableJob>();
|
---|
| 77 | public ItemCollection<RefreshableJob> Jobs {
|
---|
| 78 | get { return jobs; }
|
---|
| 79 | set { jobs = value; }
|
---|
| 80 | }
|
---|
| 81 |
|
---|
| 82 | private List<Plugin> onlinePlugins;
|
---|
| 83 | public List<Plugin> OnlinePlugins {
|
---|
| 84 | get { return onlinePlugins; }
|
---|
| 85 | set { onlinePlugins = value; }
|
---|
| 86 | }
|
---|
| 87 |
|
---|
| 88 | private List<Plugin> alreadyUploadedPlugins;
|
---|
| 89 | public List<Plugin> AlreadyUploadedPlugins {
|
---|
| 90 | get { return alreadyUploadedPlugins; }
|
---|
| 91 | set { alreadyUploadedPlugins = value; }
|
---|
| 92 | }
|
---|
| 93 |
|
---|
| 94 | public bool IsAllowedPrivileged { get; set; }
|
---|
| 95 |
|
---|
| 96 | #region constructors and cloning
|
---|
| 97 | public HiveEngine() {
|
---|
| 98 | this.ResourceNames = "HEAL";
|
---|
| 99 | this.Priority = 0;
|
---|
| 100 | this.log = new ThreadSafeLog();
|
---|
[7137] | 101 | this.IsAllowedPrivileged = HiveServiceLocator.Instance.CallHiveService((s) => s.IsAllowedPrivileged());
|
---|
[6984] | 102 | }
|
---|
| 103 |
|
---|
| 104 | [StorableConstructor]
|
---|
| 105 | protected HiveEngine(bool deserializing) : base(deserializing) { }
|
---|
| 106 | protected HiveEngine(HiveEngine original, Cloner cloner)
|
---|
| 107 | : base(original, cloner) {
|
---|
| 108 | this.ResourceNames = original.ResourceNames;
|
---|
| 109 | this.currentOperator = cloner.Clone(original.currentOperator);
|
---|
| 110 | this.priority = original.priority;
|
---|
| 111 | this.executionTimeOnHive = original.executionTimeOnHive;
|
---|
| 112 | this.IsPrivileged = original.IsPrivileged;
|
---|
| 113 | // do not clone jobs - otherwise they would be sent with every task
|
---|
| 114 | }
|
---|
| 115 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 116 | return new HiveEngine(this, cloner);
|
---|
| 117 | }
|
---|
| 118 | #endregion
|
---|
| 119 |
|
---|
| 120 | #region Events
|
---|
| 121 | protected override void OnPrepared() {
|
---|
| 122 | base.OnPrepared();
|
---|
| 123 | this.ExecutionTimeOnHive = TimeSpan.Zero;
|
---|
| 124 | }
|
---|
| 125 |
|
---|
| 126 | public event EventHandler ExecutionTimeOnHiveChanged;
|
---|
| 127 | protected virtual void OnExecutionTimeOnHiveChanged() {
|
---|
| 128 | var handler = ExecutionTimeOnHiveChanged;
|
---|
| 129 | if (handler != null) handler(this, EventArgs.Empty);
|
---|
| 130 | }
|
---|
| 131 | #endregion
|
---|
| 132 |
|
---|
| 133 | protected override void Run(CancellationToken cancellationToken) {
|
---|
| 134 | this.cancellationToken = cancellationToken;
|
---|
| 135 | Run(ExecutionStack);
|
---|
| 136 | }
|
---|
| 137 |
|
---|
| 138 | private void Run(object state) {
|
---|
| 139 | Stack<IOperation> executionStack = (Stack<IOperation>)state;
|
---|
| 140 | IOperation next;
|
---|
| 141 | OperationCollection coll;
|
---|
| 142 | IAtomicOperation operation;
|
---|
| 143 |
|
---|
| 144 | if (firstRun) {
|
---|
| 145 | TaskScheduler.UnobservedTaskException += new EventHandler<UnobservedTaskExceptionEventArgs>(TaskScheduler_UnobservedTaskException);
|
---|
[7137] | 146 | this.OnlinePlugins = HiveServiceLocator.Instance.CallHiveService(s => s.GetPlugins()).Where(x => x.Hash != null).ToList();
|
---|
[6984] | 147 | this.AlreadyUploadedPlugins = new List<Plugin>();
|
---|
| 148 | firstRun = false;
|
---|
| 149 | }
|
---|
| 150 |
|
---|
| 151 | while (executionStack.Count > 0) {
|
---|
| 152 | cancellationToken.ThrowIfCancellationRequested();
|
---|
| 153 |
|
---|
| 154 | next = executionStack.Pop();
|
---|
| 155 | if (next is OperationCollection) {
|
---|
| 156 | coll = (OperationCollection)next;
|
---|
| 157 |
|
---|
| 158 | if (coll.Parallel) {
|
---|
| 159 | try {
|
---|
| 160 | // clone the parent scope here and reuse it for each operation. otherwise for each task the whole scope-tree first needs to be copied and then cleaned, which causes a lot of work for the Garbage Collector
|
---|
| 161 | IScope parentScopeClone = (IScope)((IAtomicOperation)coll.First()).Scope.Parent.Clone();
|
---|
| 162 | parentScopeClone.SubScopes.Clear();
|
---|
| 163 | parentScopeClone.ClearParentScopes();
|
---|
| 164 |
|
---|
| 165 | EngineTask[] jobs = new EngineTask[coll.Count];
|
---|
| 166 | for (int i = 0; i < coll.Count; i++) {
|
---|
| 167 | jobs[i] = new EngineTask(coll[i], new SequentialEngine.SequentialEngine());
|
---|
| 168 | }
|
---|
| 169 |
|
---|
| 170 | var experiment = CreateJob();
|
---|
| 171 | IScope[] scopes = ExecuteOnHive(experiment, jobs, parentScopeClone, cancellationToken);
|
---|
| 172 |
|
---|
| 173 | for (int i = 0; i < coll.Count; i++) {
|
---|
| 174 | if (coll[i] is IAtomicOperation) {
|
---|
| 175 | ExchangeScope(scopes[i], ((IAtomicOperation)coll[i]).Scope);
|
---|
| 176 | } else if (coll[i] is OperationCollection) {
|
---|
| 177 | // todo ??
|
---|
| 178 | }
|
---|
| 179 | }
|
---|
| 180 | }
|
---|
| 181 | catch {
|
---|
| 182 | executionStack.Push(coll); throw;
|
---|
| 183 | }
|
---|
| 184 | } else {
|
---|
| 185 | for (int i = coll.Count - 1; i >= 0; i--)
|
---|
| 186 | if (coll[i] != null) executionStack.Push(coll[i]);
|
---|
| 187 | }
|
---|
| 188 | } else if (next is IAtomicOperation) {
|
---|
| 189 | operation = (IAtomicOperation)next;
|
---|
| 190 | try {
|
---|
| 191 | next = operation.Operator.Execute((IExecutionContext)operation, cancellationToken);
|
---|
| 192 | }
|
---|
| 193 | catch (Exception ex) {
|
---|
| 194 | executionStack.Push(operation);
|
---|
| 195 | if (ex is OperationCanceledException) throw ex;
|
---|
| 196 | else throw new OperatorExecutionException(operation.Operator, ex);
|
---|
| 197 | }
|
---|
| 198 | if (next != null) executionStack.Push(next);
|
---|
| 199 |
|
---|
| 200 | if (operation.Operator.Breakpoint) {
|
---|
| 201 | log.LogMessage(string.Format("Breakpoint: {0}", operation.Operator.Name != string.Empty ? operation.Operator.Name : operation.Operator.ItemName));
|
---|
| 202 | Pause();
|
---|
| 203 | }
|
---|
| 204 | }
|
---|
| 205 | }
|
---|
| 206 | }
|
---|
| 207 |
|
---|
| 208 | private void TaskScheduler_UnobservedTaskException(object sender, UnobservedTaskExceptionEventArgs e) {
|
---|
| 209 | e.SetObserved(); // avoid crash of process
|
---|
| 210 | }
|
---|
| 211 |
|
---|
| 212 | private IRandom FindRandomParameter(IExecutionContext ec) {
|
---|
| 213 | try {
|
---|
| 214 | if (ec == null)
|
---|
| 215 | return null;
|
---|
| 216 |
|
---|
| 217 | foreach (var p in ec.Parameters) {
|
---|
| 218 | if (p.Name == "Random" && p is IValueParameter)
|
---|
| 219 | return ((IValueParameter)p).Value as IRandom;
|
---|
| 220 | }
|
---|
| 221 | return FindRandomParameter(ec.Parent);
|
---|
| 222 | }
|
---|
| 223 | catch { return null; }
|
---|
| 224 | }
|
---|
| 225 |
|
---|
| 226 | private static void ReIntegrateScope(IAtomicOperation source, IAtomicOperation target) {
|
---|
| 227 | ExchangeScope(source.Scope, target.Scope);
|
---|
| 228 | }
|
---|
| 229 |
|
---|
| 230 | private static void ExchangeScope(IScope source, IScope target) {
|
---|
| 231 | target.Variables.Clear();
|
---|
| 232 | target.Variables.AddRange(source.Variables);
|
---|
| 233 | target.SubScopes.Clear();
|
---|
| 234 | target.SubScopes.AddRange(source.SubScopes);
|
---|
| 235 | // TODO: validate if parent scopes match - otherwise source is invalid
|
---|
| 236 | }
|
---|
| 237 |
|
---|
| 238 | /// <summary>
|
---|
| 239 | /// This method blocks until all jobs are finished
|
---|
| 240 | /// TODO: Cancelation needs to be refined; all tasks currently stay in Semaphore.WaitOne after cancelation
|
---|
| 241 | /// </summary>
|
---|
| 242 | /// <param name="jobs"></param>
|
---|
| 243 | private IScope[] ExecuteOnHive(RefreshableJob refreshableJob, EngineTask[] jobs, IScope parentScopeClone, CancellationToken cancellationToken) {
|
---|
| 244 | log.LogMessage(string.Format("Executing {0} operations on the hive.", jobs.Length));
|
---|
| 245 | IScope[] scopes = new Scope[jobs.Length];
|
---|
| 246 | object locker = new object();
|
---|
| 247 | var hiveExperiment = refreshableJob.Job;
|
---|
| 248 |
|
---|
| 249 | try {
|
---|
| 250 | // create upload-tasks
|
---|
| 251 | for (int i = 0; i < jobs.Length; i++) {
|
---|
| 252 | var engineHiveJob = new EngineHiveTask(jobs[i], parentScopeClone);
|
---|
| 253 | engineHiveJob.Task.Priority = this.Priority;
|
---|
| 254 | refreshableJob.HiveTasks.Add(engineHiveJob);
|
---|
| 255 |
|
---|
| 256 | // shuffle random variable to avoid the same random sequence in each operation; todo: does not yet work (it cannot find the random variable)
|
---|
| 257 | IRandom random = FindRandomParameter(jobs[i].InitialOperation as IExecutionContext);
|
---|
| 258 | if (random != null)
|
---|
| 259 | random.Reset(random.Next());
|
---|
| 260 | }
|
---|
| 261 | HiveClient.StartJob((e) => { log.LogException(e); }, refreshableJob, cancellationToken);
|
---|
| 262 |
|
---|
| 263 | // do polling until experiment is finished and all jobs are downloaded
|
---|
| 264 | while (!refreshableJob.AllJobsFinished()) {
|
---|
| 265 | Thread.Sleep(2000);
|
---|
| 266 | this.ExecutionTimeOnHive = TimeSpan.FromMilliseconds(jobs.Sum(x => x.ExecutionTime.TotalMilliseconds));
|
---|
| 267 | cancellationToken.ThrowIfCancellationRequested();
|
---|
| 268 | }
|
---|
| 269 | log.LogMessage(string.Format("{0} finished (TotalExecutionTime: {1}).", refreshableJob.ToString(), refreshableJob.ExecutionTime));
|
---|
| 270 |
|
---|
| 271 | var failedJobs = refreshableJob.HiveTasks.Where(x => x.Task.State == TaskState.Failed);
|
---|
| 272 | if (failedJobs.Count() > 0) {
|
---|
| 273 | throw new HiveEngineException("Task failed: " + failedJobs.First().Task.StateLog.Last().Exception);
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 | // get scopes
|
---|
| 277 | int j = 0;
|
---|
| 278 | foreach (var hiveJob in refreshableJob.HiveTasks) {
|
---|
| 279 | var scope = ((IAtomicOperation)((EngineTask)hiveJob.ItemTask).InitialOperation).Scope;
|
---|
| 280 | scopes[j++] = scope;
|
---|
| 281 | }
|
---|
| 282 | return scopes;
|
---|
| 283 | }
|
---|
| 284 | catch (OperationCanceledException e) {
|
---|
| 285 | throw e;
|
---|
| 286 | }
|
---|
| 287 | catch (Exception e) {
|
---|
| 288 | log.LogException(e);
|
---|
| 289 | throw e;
|
---|
[7137] | 290 | } finally {
|
---|
[6984] | 291 | DisposeJob(refreshableJob);
|
---|
| 292 | }
|
---|
| 293 | }
|
---|
| 294 |
|
---|
| 295 | private RefreshableJob CreateJob() {
|
---|
| 296 | lock (locker) {
|
---|
| 297 | var hiveExperiment = new Job();
|
---|
| 298 | hiveExperiment.Name = "HiveEngine Run " + jobs.Count;
|
---|
| 299 | hiveExperiment.DateCreated = DateTime.Now;
|
---|
| 300 | hiveExperiment.ResourceNames = this.ResourceNames;
|
---|
| 301 | hiveExperiment.IsPrivileged = this.IsPrivileged;
|
---|
| 302 | var refreshableHiveExperiment = new RefreshableJob(hiveExperiment);
|
---|
| 303 | refreshableHiveExperiment.IsDownloadable = false; // download happens automatically so disable button
|
---|
| 304 | jobs.Add(refreshableHiveExperiment);
|
---|
| 305 | return refreshableHiveExperiment;
|
---|
| 306 | }
|
---|
| 307 | }
|
---|
| 308 |
|
---|
| 309 | private void DisposeJob(RefreshableJob refreshableJob) {
|
---|
| 310 | refreshableJob.RefreshAutomatically = false;
|
---|
| 311 | DeleteHiveExperiment(refreshableJob.Job.Id);
|
---|
| 312 | ClearData(refreshableJob);
|
---|
| 313 | }
|
---|
| 314 |
|
---|
| 315 | private void ClearData(RefreshableJob refreshableJob) {
|
---|
| 316 | var jobs = refreshableJob.GetAllHiveTasks();
|
---|
| 317 | foreach (var job in jobs) {
|
---|
| 318 | job.ClearData();
|
---|
| 319 | }
|
---|
| 320 | }
|
---|
| 321 |
|
---|
| 322 | private void DeleteHiveExperiment(Guid jobId) {
|
---|
| 323 | HiveClient.TryAndRepeat(() => {
|
---|
[7137] | 324 | HiveServiceLocator.Instance.CallHiveService(s => s.DeleteJob(jobId));
|
---|
[6984] | 325 | }, 5, string.Format("Could not delete jobs"));
|
---|
| 326 | }
|
---|
| 327 |
|
---|
| 328 | private List<Guid> GetResourceIds() {
|
---|
[7137] | 329 | return HiveServiceLocator.Instance.CallHiveService(service => {
|
---|
[6984] | 330 | var resourceNames = ResourceNames.Split(';');
|
---|
| 331 | var resourceIds = new List<Guid>();
|
---|
| 332 | foreach (var resourceName in resourceNames) {
|
---|
| 333 | Guid resourceId = service.GetResourceId(resourceName);
|
---|
| 334 | if (resourceId == Guid.Empty) {
|
---|
| 335 | throw new ResourceNotFoundException(string.Format("Could not find the resource '{0}'", resourceName));
|
---|
| 336 | }
|
---|
| 337 | resourceIds.Add(resourceId);
|
---|
| 338 | }
|
---|
| 339 | return resourceIds;
|
---|
| 340 | });
|
---|
| 341 | }
|
---|
| 342 | }
|
---|
| 343 | }
|
---|