internal void LoadGlobalStore() { var aliases = indexQuery.GetAliases(); var properties = indexQuery.GetProperties(null); var countsDic = queryFactory.GetDocumentQuery().CountAll(aliases.Keys.ToList()); foreach (var aliasDefinition in aliases) { var name = aliasDefinition.Value.Select(a => a.Name).FirstOrDefault(); var indexName = aliasDefinition.Key; if (!properties.ContainsKey(indexName)) { continue; } var dataSet = Convert(name ?? indexName, properties[indexName], (int)countsDic[indexName]); AddGlobalStoreInternal(name, indexName, dataSet); } }
public Subset CreateByTag(string tagId, string tagField) { var docQuery = queryFactory.GetDocumentQuery(_indexName); var wordQuery = queryFactory.GetWordQuery(_indexName); var docs = docQuery.GetByTagId(tagId, tagField, DocumentQuery.GetDocumentElasticFields(new[] { DocumentElastic.IdField })); Func <string, bool> isAttachmentField = (field) => attachmentFields.Any(attachmentField => string.Equals(attachmentField, field, StringComparison.OrdinalIgnoreCase)); var fields = _textFields .Select(field => isAttachmentField(field) ? $"{field}.content" : field) .ToList(); var wwo = wordQuery.GetWordsWithOccurences(docs.Select(d => d.Id).ToList(), fields, _nGramCount); var subset = new Subset { AllWordsOccurencesSumInCorpus = _allWordsOccurences, AllWordsOccurencesSumInTag = wwo.Sum(w => w.Value.Tag), WordsWithOccurences = wwo }; return(subset); }
public IActionResult Search(string id, [FromBody] SearchRequest request) { if (GlobalStore.ServiceAliases.IsExist(id)) { id = GlobalStore.ServiceAliases.Get(id); } var validationResult = serviceManager.ValidateIfServiceActive(id, ServiceTypeEnum.Search); if (validationResult != null) { return(validationResult); } var defaultSettings = GlobalStore.ActivatedSearches.Get(id).SearchSettingsWrapper; if (request.AutoCompleteSettings != null) { validationResult = Validate(request.AutoCompleteSettings); if (validationResult != null) { return(validationResult); } } if (request.ClassifierSettings != null) { validationResult = Validate(request.ClassifierSettings); if (validationResult != null) { return(validationResult); } } if (request.SearchSettings != null) { validationResult = Validate(defaultSettings.DataSetName, request.SearchSettings); if (validationResult != null) { return(validationResult); } } var searchSettings = MergeSettings( defaultSettings, request.AutoCompleteSettings, request.ClassifierSettings, request.SearchSettings); var dataSet = GlobalStore.DataSets.Get(searchSettings.DataSetName); var result = new SearchResultWrapper(); searchHandler.SaveSearchRequest(searchSettings, request); var documentQuery = queryFactory.GetDocumentQuery(dataSet.DataSet.Name); var searchResponse = documentQuery.Search( searchSettings.AutoCompleteSettings, searchSettings.SearchSettings, request.Text, dataSet.DocumentFields, dataSet.DataSet.TagField, dataSet.DataSet.InterpretedFields, defaultSettings.SearchSettings.Filter, defaultSettings.SearchSettings.Weights ); // AUTOCOMPLETE result.AutoCompleteResultList = searchResponse.Suggest?[DocumentQuery.SuggestName].SelectMany(s => s.Options).Where(o => o.CollateMatch).Select(o => new AutoCompleteResult { Text = o.Text, Score = o.Score, }).ToList(); // SEARCH result.SearchResultList = searchResponse.Hits.Select(d => new SearchResult { Document = d.Source.DocumentObject, DocumentId = d.Id, Score = d.Score }).ToList(); result.Total = (int)searchResponse.Total; // CLASSIFIER if (searchSettings.ClassifierSettings?.Count > 0) { var searchMatchCategories = (dataSet.TagIsArray ? result.SearchResultList.SelectMany(d => ((Array)DocumentHelper.GetValue(d.Document, dataSet.DataSet.TagField)).Cast <string>()) : result.SearchResultList.Select(d => DocumentHelper.GetValue(d.Document, dataSet.DataSet.TagField).ToString())) .Distinct() .ToDictionary(t => t); var classifierId = GlobalStore.ServiceAliases.IsExist(searchSettings.ClassifierSettings.Id) ? GlobalStore.ServiceAliases.Get(searchSettings.ClassifierSettings.Id) : searchSettings.ClassifierSettings.Id; var classifier = GlobalStore.ActivatedClassifiers.Get(classifierId); //if the classifier is not activated right now if (classifier != null) { var resultsList = classifierHandler.Recommend(classifierId, request.Text, searchSettings.ClassifierSettings.Count, classifier.ClassifierEmphasizedTagIds.Any(), true); result.ClassifierResultList = resultsList.Select(r => new SearchClassifierRecommendationResult { TagId = r.TagId, Score = r.Score, Tag = r.Tag, SearchResultMatch = searchMatchCategories.ContainsKey(r.TagId) }).ToList(); //AUTOCOMPLETE foreach (var ac in result.AutoCompleteResultList) { var acResultsList = classifierHandler.Recommend(classifierId, ac.Text, searchSettings.ClassifierSettings.Count, classifier.ClassifierEmphasizedTagIds.Any(), true); ac.ClassifierResultList = resultsList.Select(r => new SearchClassifierRecommendationResult { TagId = r.TagId, Score = r.Score, Tag = r.Tag, SearchResultMatch = searchMatchCategories.ContainsKey(r.TagId) }).ToList(); } } } return(new OkObjectResult(result)); }
IDocumentQuery DocumentQuery(string dataSetName) => queryFactory.GetDocumentQuery(dataSetName);
private IDocumentQuery DocumentQuery(string dataSetName) { return(queryFactory.GetDocumentQuery(dataSetName)); }
public IActionResult Recommend(string id, [FromBody] PrcRecommendationRequest request) { if (request == null) { return(new StatusCodeResult(StatusCodes.Status400BadRequest)); } // If Id is Alias, translate to Id if (GlobalStore.ServiceAliases.IsExist(id)) { id = GlobalStore.ServiceAliases.Get(id); } if (!GlobalStore.ActivatedPrcs.IsExist(id)) { return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, string.Format(ServiceResources.ServiceNotExistsOrNotActivated, ServiceTypeEnum.Prc))); } if (!string.IsNullOrEmpty(request.TagId) && !GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.Tags.Any(t => t.Id == request.TagId)) { return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, ServiceResources.TheGivenTagIsMissingFromThePRCService)); } var globalStoreDataSet = GlobalStore.DataSets.Get(GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.DataSetName); var dataSet = globalStoreDataSet.DataSet; var analyzeQuery = queryFactory.GetAnalyzeQuery(dataSet.Name); var tokens = analyzeQuery.Analyze(request.Text, 1).ToList(); var text = string.Join(" ", tokens); //tagId meghatározása var tagId = string.Empty; if (!string.IsNullOrEmpty(request.TagId)) { tagId = request.TagId; } else { //ha nincs megadva tagId akkor kiszámoljuk a prc scorer-ekkel var allResults = new List <KeyValuePair <string, double> >(); foreach (var scorerKvp in GlobalStore.ActivatedPrcs.Get(id).PrcScorers) { var score = scorerKvp.Value.GetScore(text, 1.7, true); allResults.Add(new KeyValuePair <string, double>(scorerKvp.Key, score)); } var resultsList = allResults.Where(r => r.Value > 0).OrderByDescending(r => r.Value).ToList(); if (resultsList.Count == 0) { return(new OkObjectResult(new List <PrcRecommendationResult>())); } tagId = resultsList.First().Key; } var tagsToTest = new List <string>(); if (request.Filter?.TagIdList?.Any() == true) { var existingTags = GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.Tags.Select(t => t.Id).Intersect(request.Filter.TagIdList).ToList(); if (existingTags.Count < request.Filter.TagIdList.Count) { var missingTagIds = request.Filter.TagIdList.Except(existingTags).ToList(); return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, string.Format(ServiceResources.TheFollowingTagIdsNotExistInTheDataSet_0, string.Join(", ", missingTagIds)))); } tagsToTest = request.Filter.TagIdList; } var globalSubset = GlobalStore.ActivatedPrcs.Get(id).PrcSubsets[tagId]; if (globalSubset.WordsWithOccurences == null) { return(new HttpStatusCodeWithErrorResult(StatusCodes.Status406NotAcceptable, ServiceResources.TheGivenTagHasNoWordsInDictionary)); } var wordsInDic = globalSubset.WordsWithOccurences.Keys.Intersect(tokens).ToList(); var baseSubset = new Cerebellum.Subset { AllWordsOccurencesSumInCorpus = globalSubset.AllWordsOccurencesSumInCorpus, AllWordsOccurencesSumInTag = globalSubset.AllWordsOccurencesSumInTag, WordsWithOccurences = wordsInDic.ToDictionary(w => w, w => globalSubset.WordsWithOccurences[w]) }; var baseDic = new Cerebellum.Dictionary.TwisterAlgorithm(baseSubset, true, false).GetDictionary(); var globalScorer = GlobalStore.ActivatedPrcs.Get(id).PrcScorers[tagId]; var baseScorer = new Cerebellum.Scorer.PeSScorer(new Dictionary <int, Dictionary <string, double> > { { 1, baseDic } }); var baseScore = baseScorer.GetScore(text, 1.7); var globalScore = globalScorer.GetScore(text, 1.7); var results = new List <PrcRecommendationResult>(); if (baseScore == 0 || globalScore == 0) { return(new OkObjectResult(results)); } var filterQuery = request.Filter?.Query?.Trim(); var query = string.IsNullOrEmpty(filterQuery) ? string.Empty : $"({filterQuery}) AND "; // '+ 1' because we give score between 0 and 1 but in elasticsearch that means negative boost query = string.Format("{0}({1})", query, string.Join(" ", baseDic.Select(k => $"{k.Key}^{k.Value + 1}"))); string shouldQuery = null; // weighting if (request.Weights?.Any() == true) { shouldQuery = string.Join(" ", request.Weights.Select(k => $"({k.Query})^{k.Value}")); } var fieldsForRecommendation = GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.FieldsForRecommendation; var documentQuery = queryFactory.GetDocumentQuery(dataSet.Name); var documentElastics = new List <DocumentElastic>(); var scrollResult = documentQuery .Filter(query, tagsToTest, dataSet.TagField, request.Count, null, false, fieldsForRecommendation, globalStoreDataSet.DocumentFields, DocumentService.GetFieldFilter(globalStoreDataSet, new List <string> { request.NeedDocumentInResult ? "*" : globalStoreDataSet.DataSet.IdField }), null, null, null, shouldQuery); documentElastics.AddRange(scrollResult.Items); var docIdsWithScore = new ConcurrentDictionary <string, double>(new Dictionary <string, double>()); var wordQuery = queryFactory.GetWordQuery(dataSet.Name); Func <string, bool> isAttachmentField = (field) => globalStoreDataSet.AttachmentFields.Any(attachmentField => string.Equals(attachmentField, field, StringComparison.OrdinalIgnoreCase)); Parallel.ForEach(documentElastics, parallelService.ParallelOptions(), docElastic => { var fieldList = fieldsForRecommendation .Select(field => isAttachmentField(field) ? $"{field}.content" : field) .Select(DocumentQuery.MapDocumentObjectName) .ToList(); var wwo = wordQuery.GetWordsWithOccurences(new List <string> { docElastic.Id }, fieldList, 1); var actualCleanedText = string.Join(" ", wwo.Select(w => string.Join(" ", Enumerable.Repeat(w.Key, w.Value.Tag)))); var actualBaseScore = baseScorer.GetScore(actualCleanedText, 1.7); if (actualBaseScore == 0) { return; } var actualGlobalScore = globalScorer.GetScore(actualCleanedText, 1.7); if (actualGlobalScore == 0) { return; } var finalScore = (actualBaseScore / baseScore) / (actualGlobalScore / globalScore); docIdsWithScore.TryAdd(docElastic.Id, finalScore); }); var resultDic = docIdsWithScore.OrderByDescending(rd => rd.Value).ToList(); if (request.Count != 0 && resultDic.Count > request.Count) { resultDic = resultDic.Take(request.Count).ToList(); } var docsDic = request.NeedDocumentInResult ? resultDic.Select(r => documentElastics.First(d => d.Id == r.Key)).ToDictionary(d => d.Id, d => d) : null; return(new OkObjectResult(resultDic.Select(kvp => new PrcRecommendationResult { DocumentId = kvp.Key, Score = kvp.Value, Document = request.NeedDocumentInResult ? docsDic[kvp.Key].DocumentObject : null }))); }