public void AddDocument(Document document) { foreach (string word in document.Words) { _words.Add(word); } }
public Document PureDocument(Document document) { var words = new List<string>(); foreach (var word in document.Words) { if(_termsCollection.ContainsTerm(word)) words.Add(word); } return new Document(words); }
/// <summary> /// Stems document, creating new document, which is stemmed /// </summary> /// <param name="document">document to stem</param> /// <param name="stemmerInterface">stemmer interface to use</param> /// <returns>document, with all words stemmed.</returns> public static Document StemDocument(Document document, IStemmerInterface stemmerInterface) { if (document == null) throw new ArgumentNullException("document"); var words = new List<string>(); foreach (string word in document.Words) { words.Add(stemmerInterface.StemTerm(word)); } return new Document(words); }
public BagOfWords(Document document) { foreach (string word in document.Words) { if (_wordCount.ContainsKey(word)) { _wordCount[word]++; } else { _wordCount[word] = 1; } } }
public IEnumerable<SearchResult> Search(Document query) { var queryTfIdf = new TfIdf(new Tf(new BagOfWords(query)), _idf); var results = new List<SearchResult>(); foreach (TfIdfWithDocument tfIdfWithDocument in _tfIdfWithDocuments) { double probability = _probabilityMatrixCalculator.CalculateProbability(queryTfIdf, tfIdfWithDocument.TfIdf, _termsCollection); results.Add(new SearchResult(tfIdfWithDocument.Document, probability)); } return results.OrderByDescending(x => x.Probability); }
public TfIdfWithDocument(TfIdf tfIdf, Document document) { TfIdf = tfIdf; Document = document; }