Exemplo n.º 1
0
        private bool TraceConflicts(JobsDBDataContext context, string identification)
        {
            bool jobDeleted = false;

            foreach (var conflict in context.ChangeConflicts)
            {
                if (conflict.IsDeleted)
                {
                    JobManager.JobManagerTrace.TraceWarning(this.ToShortString() + " {0} : job is deleted unexpectedly", identification);
                    jobDeleted = true;
                    break;
                }
                else
                {
                    foreach (var item in conflict.MemberConflicts)
                    {
                        JobManager.JobManagerTrace.TraceInfo(this.ToShortString() + " {7} : conflict member {3} origin:{4} current:{5} database:{6}", job.Hash, job.PartNo, job.PartsCount,
                                                             item.Member.Name, item.OriginalValue, item.CurrentValue, item.DatabaseValue, identification);
                    }
                    conflict.Resolve(RefreshMode.KeepChanges);
                    jobsDataContext.SubmitChanges(ConflictMode.FailOnFirstConflict);
                }
            }
            return(jobDeleted);
        }
Exemplo n.º 2
0
        public JobManager(string sqlConnectionString, string resultsStorageConnectionString)
        {
            jobsDataContext = new JobsDBDataContext(sqlConnectionString);

            isDevelopmentStorage = resultsStorageConnectionString.Contains("UseDevelopmentStorage=");
            if (!isDevelopmentStorage)
            {
                // extract a BASE64-encoded 64-byte account key from the connection string
                var m = System.Text.RegularExpressions.Regex.Match(
                    resultsStorageConnectionString,
                    "AccountKey=([A-Za-z0-9+/]{86}==)");
                if (!m.Success)
                {
                    var msg = "Cannot extract account key from the supplied storage connection string";
                    JobManagerTrace.TraceError(msg);
                    throw new Exception(msg);
                }
                storageAccountKey = m.Groups[1].Value;
            }
            CloudStorageAccount csa = CloudStorageAccount.Parse(resultsStorageConnectionString);
            var blobClient          = csa.CreateCloudBlobClient();

            resultsContainer = blobClient.GetContainerReference(FetchRequestContainer);
            TryCreateRequestsContainer();
        }
Exemplo n.º 3
0
        protected RunningJob(Job job, JobsDBDataContext context, JobSettings settings)
        {
            this.settings        = settings;
            this.job             = job;
            this.jobsDataContext = context;

            Hash       = job.Hash;
            partNo     = job.PartNo;
            partsCount = job.PartsCount;
        }
Exemplo n.º 4
0
        /// <summary>
        /// Creates the Jobs table in the database if it doesn't exist or waits until it is created
        /// </summary>
        /// <param name="sqlConnString">a DB to check</param>
        /// <param name="isInitiator">if true the table will be created, otherwise the method is blocked until the table is created by someone else</param>
        public static void InitializeJobTable(string sqlConnString, bool isInitiator)
        {
            var  context          = new JobsDBDataContext(sqlConnString);
            bool jobsSchemaExists = false;

            while (!jobsSchemaExists)
            {
                try
                {
                    JobManagerTrace.TraceInfo("Connected to jobs database. {0} job(s) are pending.",
                                              (from j in context.Jobs where j.Status == (byte)JobOrPartState.Pending select j.Hash).Count());
                    jobsSchemaExists = true;
                }
                catch (SqlException)
                {
                    if (!isInitiator)
                    {
                        JobManagerTrace.TraceInfo("Jobs database doesn't contain expected schema. Waiting for the frontend (role index 0) to initialize the schema. Rechecking JobsDB in couple of seconds");
                        Thread.Sleep(TimeSpan.FromSeconds(10));
                    }
                    else
                    {
                        JobManagerTrace.TraceInfo("Jobs database doesn't contain expected schema. Deploying the schema");

                        StringBuilder sqlText = new StringBuilder();
                        using (System.IO.StreamReader reader = new System.IO.StreamReader(System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream("Microsoft.Research.Science.FetchClimate2.Jobs.sql")))
                        {
                            while (!reader.EndOfStream)
                            {
                                var str = reader.ReadLine();
                                if (!str.Contains("GO"))
                                {
                                    sqlText.AppendLine(str);
                                }
                            }
                        }
                        context.ExecuteCommand(sqlText.ToString());
                        JobManagerTrace.TraceInfo("Schema for JobsDB successfully deployed");
                    }
                }
            }
        }
Exemplo n.º 5
0
        private void TouchThreadFunc(object state)
        {
            Job job = ((Tuple <Job, JobsDBDataContext, ManualResetEvent>)state).Item1;
            JobsDBDataContext jobsDataContext = ((Tuple <Job, JobsDBDataContext, ManualResetEvent>)state).Item2;
            ManualResetEvent  stopEvent       = ((Tuple <Job, JobsDBDataContext, ManualResetEvent>)state).Item3;
            var    ramCounter = new System.Diagnostics.PerformanceCounter("Memory", "Available MBytes");
            long   curWorkingSet, curGCTotal, curPrivate;
            long   maxWorkingSet = 0;
            long   maxGCTotal = 0;
            long   maxPrivate = 0;
            double curRamAvailMb, minRamAvailMb = double.MaxValue;
            int    fails = 0;

            JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":Touch thread running");
            bool jobDeleted = false;

            while (true)
            {
                Process currentProcess = Process.GetCurrentProcess();
                currentProcess.Refresh();
                curWorkingSet = Environment.WorkingSet;
                curGCTotal    = GC.GetTotalMemory(false);
                curPrivate    = currentProcess.PrivateMemorySize64;
                curRamAvailMb = ramCounter.NextValue();
                if (maxWorkingSet < curWorkingSet)
                {
                    maxWorkingSet = curWorkingSet;
                }
                if (maxGCTotal < curGCTotal)
                {
                    maxGCTotal = curGCTotal;
                }
                if (maxPrivate < curPrivate)
                {
                    maxPrivate = curPrivate;
                }
                if (minRamAvailMb > curRamAvailMb)
                {
                    minRamAvailMb = curRamAvailMb;
                }
                try
                {
                    bool cancelationCheckNeeded = false;
                    lock (jobsDataContext)
                    {
                        JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Touch thread tick", job);
                        bool touched = false;
                        for (int touchTries = 0; touchTries < 5; touchTries++)
                        {
                            try
                            {
                                jobsDataContext.Refresh(RefreshMode.KeepChanges, job);
                                job.Touchtime = DateTime.UtcNow;
                                jobsDataContext.SubmitChanges();
                                touched = true;
                                break;
                            }
                            catch (ChangeConflictException)
                            {
                                //TraceConflicts(jobsDataContext, "updating touch time");
                                JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Failed to update touchtime. attempt {1}", job, touchTries + 1);
                                jobsDataContext.ChangeConflicts.ResolveAll(RefreshMode.OverwriteCurrentValues);
                            }
                        }
                        if (touched)
                        {
                            JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Updated TouchTime", job);
                        }
                        else
                        {
                            JobManager.JobManagerTrace.TraceWarning(this.ToShortString() + ":{0} Failed to update TouchTime", job);
                        }
                        if (job.IsHeavyJob == 0)
                        {
                            var execTimespanSec = (DateTime.UtcNow - job.StartTime.Value).TotalSeconds;
                            if (execTimespanSec > settings.LightJobExecutionPermitedTimeSec)
                            {
                                JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Job considered to be HEAVY", job);

                                for (int retries = 0; retries < 10; retries++)
                                {
                                    try
                                    {
                                        var allParts = jobsDataContext.Jobs.Where(j1 => j1.Hash == job.Hash).ToArray();
                                        foreach (var j in allParts)
                                        {
                                            j.IsHeavyJob = 1;
                                        }

                                        JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Committing HEAVY flag for all jobs part", job);
                                        jobsDataContext.Refresh(RefreshMode.KeepChanges, allParts);
                                        jobsDataContext.SubmitChanges(ConflictMode.ContinueOnConflict);
                                        break;
                                    }
                                    catch (ChangeConflictException)
                                    {
                                        JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Failed to set heavy flag for all parts. attempt {1}", job, retries + 1);
                                        //TraceConflicts(jobsDataContext, "heavy flag committing");
                                        jobsDataContext.ChangeConflicts.ResolveAll(RefreshMode.OverwriteCurrentValues);
                                    }
                                }
                                JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Successfully committed HEAVY flag for all jobs part", job);

                                cancelationCheckNeeded = true;
                            }
                            else
                            {
                                JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} touched job part record. Job still light ({1} execution seconds of {2} permitted)", job, execTimespanSec, settings.LightJobExecutionPermitedTimeSec);
                            }
                        }
                        else
                        {
                            JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} touched job part record. Job is already marked as heavy", job);
                            cancelationCheckNeeded = true;
                        }

                        if (cancelationCheckNeeded)
                        {
                            JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Checking for heavy job cancelation", job);
                            int otherRunningHeavyPartsCount = jobsDataContext.Jobs.Count(j2 => j2.StartTime < job.StartTime && j2.Status == 1 && j2.IsHeavyJob == 1);
                            if (otherRunningHeavyPartsCount >= settings.PermitedHeavyPartWorkers)
                            {
                                JobManager.JobManagerTrace.TraceInfo(this.ToShortString() + ":{0} Aborting working thread as there are {1} other active heavy parts ({2} total permitted)", job, otherRunningHeavyPartsCount, settings.PermitedHeavyPartWorkers);
                                if (workingThread != null)
                                {
                                    workingThread.Abort();
                                }
                            }
                            else
                            {
                                JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} Continuing heavy part as there are {1} other active heavy parts ({2} total permitted)", job, otherRunningHeavyPartsCount, settings.PermitedHeavyPartWorkers);
                            }
                        }
                    }

                    if (stopEvent.WaitOne(TimeSpan.FromSeconds(settings.TouchPeriodInSeconds)))
                    {
                        JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":{0} detected touch thread stop request. stopping touch thread loop...", job);
                        break;
                    }
                }
                catch (InvalidOperationException exc)
                {
                    jobDeleted = true;
                    JobManager.JobManagerTrace.TraceWarning(this.ToShortString() + ":{0} InvalidOperationException. Is job part deleted? ({1})", job, exc.ToString());
                    break;
                }
                catch (ChangeConflictException)
                {
                    lock (jobsDataContext)
                    {
                        if (TraceConflicts(jobsDataContext, "last chance exc") || jobDeleted)
                        {
                            break;
                        }
                    }
                }
                catch (Exception exc)
                {
                    JobManager.JobManagerTrace.TraceWarning(this.ToShortString() + ":{0} Error updating touch time for job {1}", job, exc.ToString());
                    if (fails++ > 3)
                    {
                        break;
                    }
                }

                if (jobDeleted)
                {
                    JobManager.JobManagerTrace.TraceWarning(this.ToShortString() + ":{0} record is deleted. stoping touch thread", job);
                }

                //curPhys = currentProcess.WorkingSet64;
                //curPaged = currentProcess.PagedMemorySize64;
                //curVirtual = currentProcess.VirtualMemorySize64;
                //if (currentLogRecord.PeakPhysicalMemoryUsage < curPhys) currentLogRecord.PeakPhysicalMemoryUsage = curPhys;
                //if (currentLogRecord.PeakPagedMemoryUsage < curPaged) currentLogRecord.PeakPagedMemoryUsage = curPaged;
                //if (currentLogRecord.PeakVirtualMemoryUsage < curVirtual) currentLogRecord.PeakVirtualMemoryUsage = curVirtual;
            }
            JobManager.JobManagerTrace.TraceVerbose(this.ToShortString() + ":Touch thread ending. Peak mem stats: working set {0}Mb, GC.allocated {1}Mb, PrivateMem {2}Mb, min available Ram {3}Mb",
                                                    ((double)maxWorkingSet) / (1024 * 1024), ((double)maxGCTotal) / (1024 * 1024), ((double)maxPrivate) / (1024 * 1024), minRamAvailMb);

            //currentLogRecord.WorkEnd = DateTime.UtcNow;
            //logManager.Insert(currentLogRecord);
        }