public void Prepare(string processId, PrcSettingsElastic settings, CancellationToken token) { var directoryPath = GetDirectoryPath(settings.ServiceId); try { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.Busy; serviceQuery.Update(service.Id, service); IOHelper.SafeDeleteDictionary(directoryPath, true); var globalStoreDataSet = GlobalStore.DataSets.Get(settings.DataSetName); var dataSet = globalStoreDataSet.DataSet; var progress = new Progress(settings.Tags.Count); var subsetCreator = new SubsetCreator(dataSet.Name, new List <string> { DocumentElastic.TextField }, dataSet.InterpretedFields.Select(DocumentQuery.MapDocumentObjectName).ToList(), 1, queryFactory, globalStoreDataSet.AttachmentFields); Directory.CreateDirectory(directoryPath); var logPrefix = $"Prc Prepare {processId}"; logger.LogInformation($"{logPrefix} starts with ParallelLimit: {parallelService.ParallelLimit}, Tags Count: {settings.Tags.Count}"); var lockObject = new object(); Parallel.ForEach(settings.Tags, parallelService.ParallelOptions(), (tag, loopState) => { token.ThrowIfCancellationRequested(); logger.LogInformation($"{logPrefix} preparing Tag: `{tag}`"); var subset = subsetCreator.CreateByTag(tag.Id, dataSet.TagField); var algorithm = new TwisterAlgorithm( subset, true, true, settings.CompressSettings.CompressCategoryOccurence, settings.CompressSettings.CompressDataSetOccurence, (LogicalOperatorEnum)settings.CompressSettings.CompressOperator); algorithm.InitTagDictionary(); var notNeededWords = subset.WordsWithOccurences.Keys.Except( algorithm.TagDictionary .Where(sd => sd.Value.PMI > 0) .Select(sd => sd.Key)).ToList(); var td = algorithm.GetDictionary(); foreach (var word in notNeededWords) { subset.WordsWithOccurences.Remove(word); } lock (lockObject) { //dictionary serialization var dicProtoBuf = new DictionaryProtoBuf { Id = tag.Id, Dictionary = td, NGram = 1 }; dicProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, dicProtoBuf.GetFileName())); //subset serialization var subsetProtoBuf = new SubsetProtoBuf { Id = tag.Id, WordsWithOccurences = subset.WordsWithOccurences, AllWordsOccurencesSumInTag = subset.AllWordsOccurencesSumInTag, AllOccurencesSumInCorpus = subset.AllWordsOccurencesSumInCorpus }; subsetProtoBuf.Serialize(string.Format("{0}/{1}", directoryPath, subsetProtoBuf.GetFileName())); progress.Step(); processHandler.Changed(processId, progress.Percent.Round(2)); } logger.LogInformation($"{logPrefix} prepared Tag: `{tag}`"); }); processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPrepared_0_Service_1, ServiceTypeEnum.Prc, service.Name)); service.Status = (int)ServiceStatusEnum.Prepared; serviceQuery.Update(service.Id, service); } catch (Exception ex) { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.New; serviceQuery.Update(service.Id, service); IOHelper.SafeDeleteDictionary(directoryPath, true); if (ex.InnerException != null && ex.InnerException is OperationCanceledException) { processHandler.Cancelled(processId); } else { processHandler.Interrupted(processId, ex); } } }
public void Activate(string processId, PrcSettingsElastic settings, CancellationToken token) { try { GC.Collect(); machineResourceService.UpdateResourcesManually(); var freeMemInBytes = machineResourceService.Status.FreeMemory * 1024 * 1024; var directoryPath = string.Format("{0}/{1}", _dictionaryRootPath, settings.ServiceId); var dictionaryPaths = IOHelper.GetFilesInFolder(directoryPath, DictionaryProtoBuf.GetExtension()); var subsetPaths = new List <string>(); subsetPaths.AddRange(IOHelper.GetFilesInFolder(directoryPath, SubsetProtoBuf.GetExtension())); var sizeInBytes = dictionaryPaths.Sum(f => new FileInfo(f).Length); sizeInBytes += subsetPaths.Sum(f => new FileInfo(f).Length); if (freeMemInBytes > 0 && freeMemInBytes < sizeInBytes * Constants.DictionaryInMemoryMultiplier) { throw new Common.Exceptions.OutOfResourceException(ServiceResources.NotEnoughResourceToActivateService); } var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.Busy; serviceQuery.Update(service.Id, service); var deserializedDics = new ConcurrentBag <DictionaryProtoBuf>(); var deserializedSubsets = new ConcurrentBag <SubsetProtoBuf>(); var lockObject = new object(); var allCount = dictionaryPaths.Count + subsetPaths.Count; var progress = new Progress(allCount); var allPaths = new List <string>(); allPaths.AddRange(dictionaryPaths); allPaths.AddRange(subsetPaths); var dicPathsDic = new ConcurrentDictionary <string, string>(dictionaryPaths.ToDictionary(p => p, p => p)); var subsetPathsDic = new ConcurrentDictionary <string, string>(subsetPaths.ToDictionary(p => p, p => p)); Parallel.ForEach(allPaths, parallelService.ParallelOptions(), (path, loopState) => { token.ThrowIfCancellationRequested(); if (dicPathsDic.ContainsKey(path)) { deserializedDics.Add(BaseProtoBuf.DeSerialize <DictionaryProtoBuf>(path)); } else if (subsetPathsDic.ContainsKey(path)) { deserializedSubsets.Add(BaseProtoBuf.DeSerialize <SubsetProtoBuf>(path)); } lock (lockObject) { progress.Step(); if (progress.Value % 15 == 0) { processHandler.Changed(processId, progress.Percent.Round(2)); } } }); var globalStorePrc = new GlobalStorePrc(); if (deserializedDics.Any()) { var scorersDic = deserializedDics.GroupBy(d => d.Id).ToDictionary(d => d.Key, d => new Cerebellum.Scorer.PeSScorer(d.ToDictionary(di => di.NGram, di => di.Dictionary))); globalStorePrc.PrcScorers = scorersDic; } if (deserializedSubsets.Any()) { var subsetsDic = deserializedSubsets.ToDictionary(d => d.Id, d => new Subset { AllWordsOccurencesSumInCorpus = d.AllOccurencesSumInCorpus, AllWordsOccurencesSumInTag = d.AllWordsOccurencesSumInTag, WordsWithOccurences = d.WordsWithOccurences }); globalStorePrc.PrcSubsets = subsetsDic; } globalStorePrc.PrcsSettings = settings; GlobalStore.ActivatedPrcs.Add(settings.ServiceId, globalStorePrc); processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyActivated_0_Service_1, ServiceTypeEnum.Prc, service.Name)); service.Status = (int)ServiceStatusEnum.Active; serviceQuery.Update(service.Id, service); } catch (Exception ex) { var service = serviceQuery.Get(settings.ServiceId); service.Status = (int)ServiceStatusEnum.Prepared; serviceQuery.Update(service.Id, service); if (GlobalStore.ActivatedPrcs.IsExist(settings.ServiceId)) { GlobalStore.ActivatedPrcs.Remove(settings.ServiceId); } if (ex.InnerException != null && ex.InnerException is OperationCanceledException) { processHandler.Cancelled(processId); } else { processHandler.Interrupted(processId, ex); } GC.Collect(); } }