Ejemplo n.º 1
0
        internal void LoadGlobalStore()
        {
            var aliases = indexQuery.GetAliases();

            var properties = indexQuery.GetProperties(null);
            var countsDic  = queryFactory.GetDocumentQuery().CountAll(aliases.Keys.ToList());

            foreach (var aliasDefinition in aliases)
            {
                var name      = aliasDefinition.Value.Select(a => a.Name).FirstOrDefault();
                var indexName = aliasDefinition.Key;

                if (!properties.ContainsKey(indexName))
                {
                    continue;
                }

                var dataSet = Convert(name ?? indexName, properties[indexName], (int)countsDic[indexName]);
                AddGlobalStoreInternal(name, indexName, dataSet);
            }
        }
Ejemplo n.º 2
0
        public Subset CreateByTag(string tagId, string tagField)
        {
            var docQuery  = queryFactory.GetDocumentQuery(_indexName);
            var wordQuery = queryFactory.GetWordQuery(_indexName);
            var docs      = docQuery.GetByTagId(tagId, tagField, DocumentQuery.GetDocumentElasticFields(new[] { DocumentElastic.IdField }));

            Func <string, bool> isAttachmentField = (field) => attachmentFields.Any(attachmentField =>
                                                                                    string.Equals(attachmentField, field, StringComparison.OrdinalIgnoreCase));

            var fields = _textFields
                         .Select(field => isAttachmentField(field) ? $"{field}.content" : field)
                         .ToList();

            var wwo = wordQuery.GetWordsWithOccurences(docs.Select(d => d.Id).ToList(), fields, _nGramCount);

            var subset = new Subset
            {
                AllWordsOccurencesSumInCorpus = _allWordsOccurences,
                AllWordsOccurencesSumInTag    = wwo.Sum(w => w.Value.Tag),
                WordsWithOccurences           = wwo
            };

            return(subset);
        }
Ejemplo n.º 3
0
        public IActionResult Search(string id, [FromBody] SearchRequest request)
        {
            if (GlobalStore.ServiceAliases.IsExist(id))
            {
                id = GlobalStore.ServiceAliases.Get(id);
            }

            var validationResult = serviceManager.ValidateIfServiceActive(id, ServiceTypeEnum.Search);

            if (validationResult != null)
            {
                return(validationResult);
            }

            var defaultSettings = GlobalStore.ActivatedSearches.Get(id).SearchSettingsWrapper;

            if (request.AutoCompleteSettings != null)
            {
                validationResult = Validate(request.AutoCompleteSettings);
                if (validationResult != null)
                {
                    return(validationResult);
                }
            }
            if (request.ClassifierSettings != null)
            {
                validationResult = Validate(request.ClassifierSettings);
                if (validationResult != null)
                {
                    return(validationResult);
                }
            }
            if (request.SearchSettings != null)
            {
                validationResult = Validate(defaultSettings.DataSetName, request.SearchSettings);
                if (validationResult != null)
                {
                    return(validationResult);
                }
            }

            var searchSettings = MergeSettings(
                defaultSettings,
                request.AutoCompleteSettings,
                request.ClassifierSettings,
                request.SearchSettings);

            var dataSet = GlobalStore.DataSets.Get(searchSettings.DataSetName);
            var result  = new SearchResultWrapper();

            searchHandler.SaveSearchRequest(searchSettings, request);

            var documentQuery  = queryFactory.GetDocumentQuery(dataSet.DataSet.Name);
            var searchResponse = documentQuery.Search(
                searchSettings.AutoCompleteSettings,
                searchSettings.SearchSettings,
                request.Text,
                dataSet.DocumentFields,
                dataSet.DataSet.TagField,
                dataSet.DataSet.InterpretedFields,
                defaultSettings.SearchSettings.Filter,
                defaultSettings.SearchSettings.Weights
                );

            // AUTOCOMPLETE
            result.AutoCompleteResultList = searchResponse.Suggest?[DocumentQuery.SuggestName].SelectMany(s => s.Options).Where(o => o.CollateMatch).Select(o =>
                                                                                                                                                            new AutoCompleteResult
            {
                Text  = o.Text,
                Score = o.Score,
            }).ToList();

            // SEARCH
            result.SearchResultList = searchResponse.Hits.Select(d =>
                                                                 new SearchResult
            {
                Document   = d.Source.DocumentObject,
                DocumentId = d.Id,
                Score      = d.Score
            }).ToList();
            result.Total = (int)searchResponse.Total;

            // CLASSIFIER
            if (searchSettings.ClassifierSettings?.Count > 0)
            {
                var searchMatchCategories = (dataSet.TagIsArray
                    ? result.SearchResultList.SelectMany(d => ((Array)DocumentHelper.GetValue(d.Document, dataSet.DataSet.TagField)).Cast <string>())
                    : result.SearchResultList.Select(d => DocumentHelper.GetValue(d.Document, dataSet.DataSet.TagField).ToString()))
                                            .Distinct()
                                            .ToDictionary(t => t);

                var classifierId = GlobalStore.ServiceAliases.IsExist(searchSettings.ClassifierSettings.Id) ? GlobalStore.ServiceAliases.Get(searchSettings.ClassifierSettings.Id) : searchSettings.ClassifierSettings.Id;
                var classifier   = GlobalStore.ActivatedClassifiers.Get(classifierId);
                //if the classifier is not activated right now
                if (classifier != null)
                {
                    var resultsList = classifierHandler.Recommend(classifierId, request.Text, searchSettings.ClassifierSettings.Count, classifier.ClassifierEmphasizedTagIds.Any(), true);

                    result.ClassifierResultList = resultsList.Select(r => new SearchClassifierRecommendationResult
                    {
                        TagId             = r.TagId,
                        Score             = r.Score,
                        Tag               = r.Tag,
                        SearchResultMatch = searchMatchCategories.ContainsKey(r.TagId)
                    }).ToList();

                    //AUTOCOMPLETE
                    foreach (var ac in result.AutoCompleteResultList)
                    {
                        var acResultsList = classifierHandler.Recommend(classifierId, ac.Text, searchSettings.ClassifierSettings.Count, classifier.ClassifierEmphasizedTagIds.Any(), true);
                        ac.ClassifierResultList = resultsList.Select(r => new SearchClassifierRecommendationResult
                        {
                            TagId             = r.TagId,
                            Score             = r.Score,
                            Tag               = r.Tag,
                            SearchResultMatch = searchMatchCategories.ContainsKey(r.TagId)
                        }).ToList();
                    }
                }
            }
            return(new OkObjectResult(result));
        }
Ejemplo n.º 4
0
 IDocumentQuery DocumentQuery(string dataSetName) => queryFactory.GetDocumentQuery(dataSetName);
Ejemplo n.º 5
0
 private IDocumentQuery DocumentQuery(string dataSetName)
 {
     return(queryFactory.GetDocumentQuery(dataSetName));
 }
Ejemplo n.º 6
0
        public IActionResult Recommend(string id, [FromBody] PrcRecommendationRequest request)
        {
            if (request == null)
            {
                return(new StatusCodeResult(StatusCodes.Status400BadRequest));
            }
            // If Id is Alias, translate to Id
            if (GlobalStore.ServiceAliases.IsExist(id))
            {
                id = GlobalStore.ServiceAliases.Get(id);
            }

            if (!GlobalStore.ActivatedPrcs.IsExist(id))
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, string.Format(ServiceResources.ServiceNotExistsOrNotActivated, ServiceTypeEnum.Prc)));
            }

            if (!string.IsNullOrEmpty(request.TagId) && !GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.Tags.Any(t => t.Id == request.TagId))
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest, ServiceResources.TheGivenTagIsMissingFromThePRCService));
            }


            var globalStoreDataSet = GlobalStore.DataSets.Get(GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.DataSetName);
            var dataSet            = globalStoreDataSet.DataSet;
            var analyzeQuery       = queryFactory.GetAnalyzeQuery(dataSet.Name);

            var tokens = analyzeQuery.Analyze(request.Text, 1).ToList();
            var text   = string.Join(" ", tokens);

            //tagId meghatározása
            var tagId = string.Empty;

            if (!string.IsNullOrEmpty(request.TagId))
            {
                tagId = request.TagId;
            }
            else
            {
                //ha nincs megadva tagId akkor kiszámoljuk a prc scorer-ekkel
                var allResults = new List <KeyValuePair <string, double> >();
                foreach (var scorerKvp in GlobalStore.ActivatedPrcs.Get(id).PrcScorers)
                {
                    var score = scorerKvp.Value.GetScore(text, 1.7, true);
                    allResults.Add(new KeyValuePair <string, double>(scorerKvp.Key, score));
                }
                var resultsList = allResults.Where(r => r.Value > 0).OrderByDescending(r => r.Value).ToList();
                if (resultsList.Count == 0)
                {
                    return(new OkObjectResult(new List <PrcRecommendationResult>()));
                }
                tagId = resultsList.First().Key;
            }

            var tagsToTest = new List <string>();

            if (request.Filter?.TagIdList?.Any() == true)
            {
                var existingTags = GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.Tags.Select(t => t.Id).Intersect(request.Filter.TagIdList).ToList();
                if (existingTags.Count < request.Filter.TagIdList.Count)
                {
                    var missingTagIds = request.Filter.TagIdList.Except(existingTags).ToList();
                    return(new HttpStatusCodeWithErrorResult(StatusCodes.Status400BadRequest,
                                                             string.Format(ServiceResources.TheFollowingTagIdsNotExistInTheDataSet_0, string.Join(", ", missingTagIds))));
                }
                tagsToTest = request.Filter.TagIdList;
            }

            var globalSubset = GlobalStore.ActivatedPrcs.Get(id).PrcSubsets[tagId];

            if (globalSubset.WordsWithOccurences == null)
            {
                return(new HttpStatusCodeWithErrorResult(StatusCodes.Status406NotAcceptable, ServiceResources.TheGivenTagHasNoWordsInDictionary));
            }

            var wordsInDic = globalSubset.WordsWithOccurences.Keys.Intersect(tokens).ToList();

            var baseSubset = new Cerebellum.Subset
            {
                AllWordsOccurencesSumInCorpus = globalSubset.AllWordsOccurencesSumInCorpus,
                AllWordsOccurencesSumInTag    = globalSubset.AllWordsOccurencesSumInTag,
                WordsWithOccurences           = wordsInDic.ToDictionary(w => w, w => globalSubset.WordsWithOccurences[w])
            };
            var baseDic = new Cerebellum.Dictionary.TwisterAlgorithm(baseSubset, true, false).GetDictionary();

            var globalScorer = GlobalStore.ActivatedPrcs.Get(id).PrcScorers[tagId];
            var baseScorer   = new Cerebellum.Scorer.PeSScorer(new Dictionary <int, Dictionary <string, double> > {
                { 1, baseDic }
            });

            var baseScore   = baseScorer.GetScore(text, 1.7);
            var globalScore = globalScorer.GetScore(text, 1.7);

            var results = new List <PrcRecommendationResult>();

            if (baseScore == 0 || globalScore == 0)
            {
                return(new OkObjectResult(results));
            }

            var filterQuery = request.Filter?.Query?.Trim();
            var query       = string.IsNullOrEmpty(filterQuery) ? string.Empty : $"({filterQuery}) AND ";

            // '+ 1' because we give score between 0 and 1 but in elasticsearch that means negative boost
            query = string.Format("{0}({1})", query, string.Join(" ", baseDic.Select(k => $"{k.Key}^{k.Value + 1}")));

            string shouldQuery = null;

            // weighting
            if (request.Weights?.Any() == true)
            {
                shouldQuery = string.Join(" ", request.Weights.Select(k => $"({k.Query})^{k.Value}"));
            }

            var fieldsForRecommendation = GlobalStore.ActivatedPrcs.Get(id).PrcsSettings.FieldsForRecommendation;

            var documentQuery    = queryFactory.GetDocumentQuery(dataSet.Name);
            var documentElastics = new List <DocumentElastic>();
            var scrollResult     = documentQuery
                                   .Filter(query,
                                           tagsToTest,
                                           dataSet.TagField,
                                           request.Count,
                                           null, false,
                                           fieldsForRecommendation,
                                           globalStoreDataSet.DocumentFields,
                                           DocumentService.GetFieldFilter(globalStoreDataSet, new List <string> {
                request.NeedDocumentInResult ? "*" : globalStoreDataSet.DataSet.IdField
            }),
                                           null, null, null,
                                           shouldQuery);

            documentElastics.AddRange(scrollResult.Items);

            var docIdsWithScore = new ConcurrentDictionary <string, double>(new Dictionary <string, double>());
            var wordQuery       = queryFactory.GetWordQuery(dataSet.Name);

            Func <string, bool> isAttachmentField = (field) => globalStoreDataSet.AttachmentFields.Any(attachmentField =>
                                                                                                       string.Equals(attachmentField, field, StringComparison.OrdinalIgnoreCase));

            Parallel.ForEach(documentElastics, parallelService.ParallelOptions(), docElastic =>
            {
                var fieldList = fieldsForRecommendation
                                .Select(field => isAttachmentField(field) ? $"{field}.content" : field)
                                .Select(DocumentQuery.MapDocumentObjectName)
                                .ToList();

                var wwo = wordQuery.GetWordsWithOccurences(new List <string> {
                    docElastic.Id
                }, fieldList, 1);
                var actualCleanedText = string.Join(" ", wwo.Select(w => string.Join(" ", Enumerable.Repeat(w.Key, w.Value.Tag))));

                var actualBaseScore = baseScorer.GetScore(actualCleanedText, 1.7);
                if (actualBaseScore == 0)
                {
                    return;
                }

                var actualGlobalScore = globalScorer.GetScore(actualCleanedText, 1.7);
                if (actualGlobalScore == 0)
                {
                    return;
                }

                var finalScore = (actualBaseScore / baseScore) / (actualGlobalScore / globalScore);
                docIdsWithScore.TryAdd(docElastic.Id, finalScore);
            });

            var resultDic = docIdsWithScore.OrderByDescending(rd => rd.Value).ToList();

            if (request.Count != 0 && resultDic.Count > request.Count)
            {
                resultDic = resultDic.Take(request.Count).ToList();
            }

            var docsDic = request.NeedDocumentInResult
                ? resultDic.Select(r => documentElastics.First(d => d.Id == r.Key)).ToDictionary(d => d.Id, d => d)
                : null;

            return(new OkObjectResult(resultDic.Select(kvp => new PrcRecommendationResult
            {
                DocumentId = kvp.Key,
                Score = kvp.Value,
                Document = request.NeedDocumentInResult ? docsDic[kvp.Key].DocumentObject : null
            })));
        }