/// <summary> /// Update Stage in DB /// and lock the project /// </summary> /// <param name="crawlId"></param> /// <param name="resources"></param> /// <returns></returns> public static bool UpdateCrawlStatsStage(string crawlId, KitsuneKrawlerStatusCompletion stage) { try { if (_server == null) { InitiateConnection(); } var urlCollection = _kitsuneDB.GetCollection <KitsuneKrawlerStats>(EnvironmentConstants.ApplicationConfiguration.MongoDBCollections.KitsuneKrawlStatsCollection); //Filter Defination var fdb = new FilterDefinitionBuilder <KitsuneKrawlerStats>(); var filter = fdb.Where(x => x.ProjectId == crawlId); //Update Defination var udb = new UpdateDefinitionBuilder <KitsuneKrawlerStats>(); var update = udb.Set(x => x.Stage, stage) .Set(x => x.IsLocked, false); //Process var result = urlCollection.UpdateOne(filter, update); return(true); } catch (Exception ex) { return(false); } }
public static void Process() { try { while (true) { string projectId = String.Empty; KitsuneKrawlerStatusCompletion stage = KitsuneKrawlerStatusCompletion.Error; try { var amazonSqsQueueHandler = new AmazonSQSQueueHandlers <KrawlSQSModel>(EnvironmentConstants.ApplicationConfiguration.CrawlerSQSUrl); //var task = amazonSqsQueueHandler.ReceiveMessageFromQueue(awsSQSConfig.AWSAccessKey, awsSQSConfig.AWSSecretKey); var task = new AmazonAWSHelpers.Models.AmazonSQSMessageQueueModel <KrawlSQSModel>() { MessageBody = new KrawlSQSModel { ProjectId = "5ce4ef18abc486000121acb8", ReCrawl = true } }; if (task != null) { projectId = task.MessageBody.ProjectId; if (!String.IsNullOrEmpty(projectId)) { try { #region Initiate Logger Logger.InitLogger(awsCloudWatchConfig.AWSAccessKey, awsCloudWatchConfig.AWSSecretKey, LogGroup, projectId); #endregion #region Before Process try { ServiceInformationHelper serviceInfo = new ServiceInformationHelper(); Log.Information($"ProjectId : {projectId}, IP: {serviceInfo.GetInstancePrivateIpAddress()}"); } catch { } Uri uri = null; //Get the Details from DB var crawlDetails = MongoHelper.GetCrawlingDetails(projectId); if (crawlDetails == null) { throw new Exception("CrawlDetails was null"); } if (!Uri.TryCreate(crawlDetails.Url, UriKind.Absolute, out uri)) { throw new Exception(String.Format("Error Creating Uri from Url : {0}", crawlDetails.Url)); } stage = crawlDetails.Stage; #endregion #region Process var isTaskCompleted = false; Log.Information($"Started, Stage: {stage.ToString()}"); try { switch (stage) { case KitsuneKrawlerStatusCompletion.Initialising: InitialiseKrawlerStageHelper.InitialiseKrawler(projectId, uri); isTaskCompleted = true; break; case KitsuneKrawlerStatusCompletion.IdentifyingAllAssetsAndDownloadingWebpage: MigrationStageHelper.AnalyseTheWebsite(projectId, uri, crawlDetails.CrawlType.Equals(KrawlType.DeepKrawl)); isTaskCompleted = true; break; case KitsuneKrawlerStatusCompletion.DownloadingAllStaticAssetsToStorage: ResourcesStageHelper.DownloadTheResources(projectId, uri); isTaskCompleted = true; break; case KitsuneKrawlerStatusCompletion.UpdatingWebPagesWithNewStaticAssetUri: PlaceHolderReplacerHelper.ReplacePlaceHolder(projectId, uri); isTaskCompleted = true; break; default: break; } } catch (Exception ex) { MongoHelper.UpdateCrawlErrorMessage(projectId, new KrawlError { ErrorMessage = ex.Message, Stage = stage }); } Log.Information($"Completed, Stage: {stage.ToString()}"); #endregion #region After Process amazonSqsQueueHandler.DeleteMessageFromQueue(task, awsSQSConfig.AWSAccessKey, awsSQSConfig.AWSSecretKey); if (isTaskCompleted) { stage += 1; MongoHelper.UpdateCrawlStatsStage(projectId, stage); //Crawling completed successfully if (stage == KitsuneKrawlerStatusCompletion.Completed) { try { APIHelper.KrawlingCompletedUpdateKitsuneProjects(projectId); } catch (Exception ex) { Log.Error(ex, $"ProjectId:{projectId}, Message:Error updating DB after completion"); } } //If need furthur Process again push to sqs if (stage != KitsuneKrawlerStatusCompletion.IdentifyingExternalDomains && stage != KitsuneKrawlerStatusCompletion.Error && stage < KitsuneKrawlerStatusCompletion.Completed) { amazonSqsQueueHandler.PushMessageToQueue(task.MessageBody, awsSQSConfig.AWSAccessKey, awsSQSConfig.AWSSecretKey); } //Event- Analyse Completed (select the domains to download and start next stage) if (stage == KitsuneKrawlerStatusCompletion.IdentifyingExternalDomains) { if (task.MessageBody.ReCrawl) { MongoHelper.UpdateCrawlStatsStage(projectId, stage + 1); amazonSqsQueueHandler.PushMessageToQueue(task.MessageBody, awsSQSConfig.AWSAccessKey, awsSQSConfig.AWSSecretKey); } else { APIHelper.RegisterAnalyseCompleteEvent(projectId); } } } else { Log.Error($"ProjectId:{projectId}, Message:Error as isTaskCompleted was false for projectId: {projectId}"); } #endregion } catch (Exception ex) { //Handle if any exception rises Log.Error($"ProjectId:{projectId}, Message:Error while Processing the project with Error : {ex.Message}"); MongoHelper.UpdateCrawlErrorMessage(projectId, new KrawlError { ErrorMessage = ex.Message, Stage = stage }); amazonSqsQueueHandler.DeleteMessageFromQueue(task, awsSQSConfig.AWSAccessKey, awsSQSConfig.AWSSecretKey); } Logger.InitLogger(awsCloudWatchConfig.AWSAccessKey, awsCloudWatchConfig.AWSSecretKey, LogGroup); } else { Log.Error($"ProjectId:{projectId}, Message:Error while processing the Service as the projectId was null"); amazonSqsQueueHandler.DeleteMessageFromQueue(task, awsSQSConfig.AWSAccessKey, awsSQSConfig.AWSSecretKey); } } } catch (Exception ex) { //Error picking message from sqs //Error Deleting message from sqs Log.Error(ex, $"ProjectId:{projectId}, Message:Error while processing the Service after getting the value"); } } } catch (Exception ex) { Console.WriteLine($"Message:Error while polling from SQS, Exception : {ex.ToString()}"); } }