public int Indexing <TObj>(int documentId, TObj obj) where TObj : class { _documentStorage.Insert <TObj>(documentId, obj); NumberOfDocuments++; var numberOfTerms = 0; foreach (var member in typeof(TObj).GetProperties()) { var fieldName = member.FullName(); var fieldId = _fieldDictionary.GetOrCreate(fieldName); var attribute = member.GetCustomAttributes(typeof(IndexingAttribute), true).FirstOrDefault(); if (attribute != null && member.PropertyType == typeof(string)) { var text = member.GetValue(obj, null) as string; if (!string.IsNullOrEmpty(text)) { foreach (var token in _tokinizer.GetTokens(text)) { numberOfTerms++; var wordId = _vocabulary.GetOrAddIndex(token.Term.ToLower()); var termInfo = new TermInformation() { TermIndex = token.Index, FieldId = fieldId, StartIndex = 0, StopIndex = 0 }; _invertedIndex.Insert(wordId, documentId, termInfo); } } } } NumberOfTerms += numberOfTerms; _documentNumberOfTerms[documentId] = numberOfTerms; return(documentId); }
public IFilter MultiMatch <TObj>(string query, IEnumerable <MatchField <TObj> > fields) { var resultContainer = new ConcurrentDictionary <int, double>(); var terms = _tokinizer.GetTokens(query.ToLower()); var termIds = terms.Select(x => new { id = _vocabulary.GetIndex(x.Term), term = x.Term }).Where(x => x.id != -1); foreach (var termId in termIds) { foreach (var field in fields) { var fieldName = field.field.GetExpressionName(); var docScore = _searchEngine.SearchForTerm(termId.id, fieldName); foreach (var score in docScore) { var docTempScore = resultContainer.GetOrAdd(score.DocumentId, 0); var newScore = docTempScore + (score.Score * field.Boost); resultContainer.TryUpdate(score.DocumentId, newScore, docTempScore); } } } _queryDocumants = resultContainer.OrderByDescending(x => x.Value).Select(x => new DocumentScore() { DocumentId = x.Key, Score = x.Value }).ToList(); return(this); }
private int IndexingText(int documentId, string text, string fieldName) { var numberOfTokens = 0; var tokens = _tokinizer.GetTokens(text); var invertedIndex = _fieldIndex.GetIndexer(fieldName); foreach (var token in tokens) { var wordId = _vocabulary.GetOrAddIndex(token.Term.ToLower()); var termInfo = new TermInformation() { TermIndex = token.Index, StartIndex = 0, StopIndex = 0 }; invertedIndex.Insert(wordId, documentId, termInfo); numberOfTokens++; } return(numberOfTokens); }
public IEnumerable <string[]> GetSentenc(string corpus) { var sentenc = new List <string>(); foreach (var token in _tokinizer.GetTokens(corpus)) { if (token.Term == "." || token.Term == "?" || token.Term == "!") { yield return(sentenc.ToArray()); sentenc.Clear(); } else { sentenc.Add(token.Term); } } if (sentenc.Count > 0) { yield return(sentenc.ToArray()); } }