Пример #1
0
        public int Indexing <TObj>(int documentId, TObj obj) where TObj : class
        {
            _documentStorage.Insert <TObj>(documentId, obj);
            NumberOfDocuments++;
            var numberOfTerms = 0;

            foreach (var member in typeof(TObj).GetProperties())
            {
                var fieldName = member.FullName();
                var fieldId   = _fieldDictionary.GetOrCreate(fieldName);
                var attribute = member.GetCustomAttributes(typeof(IndexingAttribute), true).FirstOrDefault();
                if (attribute != null && member.PropertyType == typeof(string))
                {
                    var text = member.GetValue(obj, null) as string;
                    if (!string.IsNullOrEmpty(text))
                    {
                        foreach (var token in _tokinizer.GetTokens(text))
                        {
                            numberOfTerms++;
                            var wordId   = _vocabulary.GetOrAddIndex(token.Term.ToLower());
                            var termInfo = new TermInformation()
                            {
                                TermIndex = token.Index, FieldId = fieldId, StartIndex = 0, StopIndex = 0
                            };
                            _invertedIndex.Insert(wordId, documentId, termInfo);
                        }
                    }
                }
            }
            NumberOfTerms += numberOfTerms;
            _documentNumberOfTerms[documentId] = numberOfTerms;
            return(documentId);
        }
Пример #2
0
        public IFilter MultiMatch <TObj>(string query, IEnumerable <MatchField <TObj> > fields)
        {
            var resultContainer = new ConcurrentDictionary <int, double>();
            var terms           = _tokinizer.GetTokens(query.ToLower());
            var termIds         = terms.Select(x => new { id = _vocabulary.GetIndex(x.Term), term = x.Term }).Where(x => x.id != -1);

            foreach (var termId in termIds)
            {
                foreach (var field in fields)
                {
                    var fieldName = field.field.GetExpressionName();
                    var docScore  = _searchEngine.SearchForTerm(termId.id, fieldName);
                    foreach (var score in docScore)
                    {
                        var docTempScore = resultContainer.GetOrAdd(score.DocumentId, 0);
                        var newScore     = docTempScore + (score.Score * field.Boost);
                        resultContainer.TryUpdate(score.DocumentId, newScore, docTempScore);
                    }
                }
            }
            _queryDocumants = resultContainer.OrderByDescending(x => x.Value).Select(x => new DocumentScore()
            {
                DocumentId = x.Key, Score = x.Value
            }).ToList();
            return(this);
        }
Пример #3
0
        private int IndexingText(int documentId, string text, string fieldName)
        {
            var numberOfTokens = 0;
            var tokens         = _tokinizer.GetTokens(text);
            var invertedIndex  = _fieldIndex.GetIndexer(fieldName);

            foreach (var token in tokens)
            {
                var wordId   = _vocabulary.GetOrAddIndex(token.Term.ToLower());
                var termInfo = new TermInformation()
                {
                    TermIndex = token.Index, StartIndex = 0, StopIndex = 0
                };
                invertedIndex.Insert(wordId, documentId, termInfo);
                numberOfTokens++;
            }
            return(numberOfTokens);
        }
Пример #4
0
        public IEnumerable <string[]> GetSentenc(string corpus)
        {
            var sentenc = new List <string>();

            foreach (var token in _tokinizer.GetTokens(corpus))
            {
                if (token.Term == "." || token.Term == "?" || token.Term == "!")
                {
                    yield return(sentenc.ToArray());

                    sentenc.Clear();
                }
                else
                {
                    sentenc.Add(token.Term);
                }
            }
            if (sentenc.Count > 0)
            {
                yield return(sentenc.ToArray());
            }
        }