public void Prepare(string processId, SearchSettingsWrapperElastic settings, CancellationToken token) { try { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.Busy; serviceQuery.Update(service.Id, service); processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Classifier, service.Name)); service.Status = (int)ServiceStatusEnum.Prepared; serviceQuery.Update(service.Id, service); } catch (Exception ex) { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.New; serviceQuery.Update(service.Id, service); if (ex.InnerException != null && ex.InnerException is OperationCanceledException) { processHandler.Cancelled(processId); } else { processHandler.Interrupted(processId, ex); } } }
public void Prepare(string processId, ClassifierSettingsElastic settings, CancellationToken token) { var directoryPath = GetDirectoryPath(settings.ServiceId); try { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.Busy; serviceQuery.Update(service.Id, service); IOHelper.SafeDeleteDictionary(directoryPath, true); var globalStoreDataSet = GlobalStore.DataSets.Get(settings.DataSetName); var dataSet = globalStoreDataSet.DataSet; var allDicCount = settings.NGramList.Count * settings.Tags.Count; var counter = 0; var lockObject = new object(); Directory.CreateDirectory(directoryPath); foreach (var nGram in settings.NGramList) { var subsetCreator = new SubsetCreator(dataSet.Name, new List <string> { DocumentElastic.TextField }, dataSet.InterpretedFields.Select(DocumentQuery.MapDocumentObjectName).ToList(), nGram, queryFactory, globalStoreDataSet.AttachmentFields); var actualDirectory = string.Format("{0}/{1}", directoryPath, nGram); Directory.CreateDirectory(actualDirectory); Parallel.ForEach(settings.Tags, parallelService.ParallelOptions(), (tag, loopState) => { token.ThrowIfCancellationRequested(); var subset = subsetCreator.CreateByTag(tag.Id, dataSet.TagField); var dictionary = new TwisterAlgorithm( subset, true, false, settings.CompressSettings.CompressCategoryOccurence, settings.CompressSettings.CompressDataSetOccurence, (LogicalOperatorEnum)settings.CompressSettings.CompressOperator).GetDictionary(); var dicProtoBuf = new DictionaryProtoBuf { Id = tag.Id, Dictionary = dictionary, NGram = nGram }; lock (lockObject) { dicProtoBuf.Serialize(string.Format("{0}/{1}", actualDirectory, dicProtoBuf.GetFileName())); processHandler.Changed(processId, Math.Round(++counter / (double)allDicCount * 100, 2)); } }); } processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Classifier, service.Name)); service.Status = (int)ServiceStatusEnum.Prepared; serviceQuery.Update(service.Id, service); } catch (Exception ex) { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.New; serviceQuery.Update(service.Id, service); IOHelper.SafeDeleteDictionary(directoryPath, true); if (ex.InnerException != null && ex.InnerException is OperationCanceledException) { processHandler.Cancelled(processId); } else { processHandler.Interrupted(processId, ex); } } }
public void CopyOrMove(string processId, string dataSetName, IEnumerable <string> documentIds, string targetDataSetName, int parallelLimit, bool isMove, CancellationToken token, string hostUrl) { var results = new BulkResults(); //// TODO: Validate target schema var copiedDocumentIds = new ConcurrentBag <string>(); var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = parallelLimit }; var sourceDocumentQuery = DocumentQuery(dataSetName); var targetDocumentQuery = DocumentQuery(targetDataSetName); var allCount = documentIds.Count(); Parallel.ForEach( documentIds.Batch(siteConfig.Resources.MaxSearchBulkCount), parallelOptions, (batchIds, loopState) => { try { if (token.IsCancellationRequested) { processHandler.Cancelled(processId); return; } var batchDocuments = sourceDocumentQuery.Get(batchIds); var interpretedFields = DataSet(dataSetName).DataSet.InterpretedFields; foreach (var document in batchDocuments.Where(doc => string.IsNullOrEmpty(doc.Text))) { document.Text = DocumentHelper.GetConcatenatedText(document.DocumentObject, interpretedFields); } var bulkResponse = targetDocumentQuery.Index(batchDocuments); results.Results.AddRange(bulkResponse.ToBulkResult()); processHandler.Changed(processId, Math.Round(results.Results.Count / (double)allCount * 100, 2)); } catch (Exception ex) { processHandler.Interrupted(processId, ex); loopState.Stop(); } }); targetDocumentQuery.Flush(); var succeedDocumentIds = results.Results.Where(r => r.StatusCode == StatusCodes.Status200OK).Select(r => r.Id).ToList(); var failedDocumentIds = results.Results.Where(r => r.StatusCode != StatusCodes.Status200OK).Select(r => r.Id).ToList(); if (isMove) { if (succeedDocumentIds.Any()) { sourceDocumentQuery.Delete(succeedDocumentIds); sourceDocumentQuery.Flush(); } } // save the response var fileName = string.Format("{0}.json", processId); var resultPath = string.Format("{0}/{1}", siteConfig.Directory.User, fileName); File.AppendAllText(resultPath, JsonConvert.SerializeObject(results.Results.OrderByDescending(r => r.StatusCode), Formatting.Indented)); var url = string.Format("{0}{1}/{2}", hostUrl, Constants.FilesPath, fileName); processHandler.Finished(processId, string.Format("{0}\n{1}", string.Format( isMove ? DocumentResources.MoveFinishedFrom_0_To_1_Succeeded_2_Failed_3 : DocumentResources.CopyFinishedFrom_0_To_1_Succeeded_2_Failed_3, dataSetName, targetDataSetName, succeedDocumentIds.Count, failedDocumentIds.Count), string.Format(DocumentResources.ResultFileCanBeDownloadFromHere_0, url) )); }
public void Prepare(string processId, PrcSettingsElastic settings, CancellationToken token) { var directoryPath = GetDirectoryPath(settings.ServiceId); try { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.Busy; serviceQuery.Update(service.Id, service); IOHelper.SafeDeleteDictionary(directoryPath, true); var globalStoreDataSet = GlobalStore.DataSets.Get(settings.DataSetName); var dataSet = globalStoreDataSet.DataSet; var progress = new Progress(settings.Tags.Count); var subsetCreator = new SubsetCreator(dataSet.Name, new List <string> { DocumentElastic.TextField }, dataSet.InterpretedFields.Select(DocumentQuery.MapDocumentObjectName).ToList(), 1, queryFactory, globalStoreDataSet.AttachmentFields); Directory.CreateDirectory(directoryPath); var logPrefix = $"Prc Prepare {processId}"; logger.LogInformation($"{logPrefix} starts with ParallelLimit: {parallelService.ParallelLimit}, Tags Count: {settings.Tags.Count}"); var lockObject = new object(); Parallel.ForEach(settings.Tags, parallelService.ParallelOptions(), (tag, loopState) => { token.ThrowIfCancellationRequested(); logger.LogInformation($"{logPrefix} preparing Tag: `{tag}`"); var subset = subsetCreator.CreateByTag(tag.Id, dataSet.TagField); var algorithm = new TwisterAlgorithm( subset, true, true, settings.CompressSettings.CompressCategoryOccurence, settings.CompressSettings.CompressDataSetOccurence, (LogicalOperatorEnum)settings.CompressSettings.CompressOperator); algorithm.InitTagDictionary(); var notNeededWords = subset.WordsWithOccurences.Keys.Except( algorithm.TagDictionary .Where(sd => sd.Value.PMI > 0) .Select(sd => sd.Key)).ToList(); var td = algorithm.GetDictionary(); foreach (var word in notNeededWords) { subset.WordsWithOccurences.Remove(word); } lock (lockObject) { //dictionary serialization var dicProtoBuf = new DictionaryProtoBuf { Id = tag.Id, Dictionary = td, NGram = 1 }; dicProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, dicProtoBuf.GetFileName())); //subset serialization var subsetProtoBuf = new SubsetProtoBuf { Id = tag.Id, WordsWithOccurences = subset.WordsWithOccurences, AllWordsOccurencesSumInTag = subset.AllWordsOccurencesSumInTag, AllOccurencesSumInCorpus = subset.AllWordsOccurencesSumInCorpus }; subsetProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, subsetProtoBuf.GetFileName())); progress.Step(); processHandler.Changed(processId, progress.Percent.Round(2)); } logger.LogInformation($"{logPrefix} prepared Tag: `{tag}`"); }); processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Prc, service.Name)); service.Status = (int)ServiceStatusEnum.Prepared; serviceQuery.Update(service.Id, service); } catch (Exception ex) { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.New; serviceQuery.Update(service.Id, service); IOHelper.SafeDeleteDictionary(directoryPath, true); if (ex.InnerException != null && ex.InnerException is OperationCanceledException) { processHandler.Cancelled(processId); } else { processHandler.Interrupted(processId, ex); } } }