Пример #1
0
 public double CalculateDistance(ITermWeightRepresentation termWeightRepresentation,
                                 TermsCollection termsCollection)
 {
     double sum = 0;
     foreach (string term in termsCollection.Terms)
     {
         sum += Math.Pow(termWeightRepresentation.TermWeight(term), 2);
     }
     return Math.Sqrt(sum);
 }
Пример #2
0
        public TfIdfIndexer(IEnumerable<Document> documents)
        {
            _idf = new Idf(documents.Select(x => new BagOfWords(x)));
            _tfIdfWithDocuments =
                documents.Select(x => new TfIdfWithDocument(new TfIdf(new Tf(new BagOfWords(x)), _idf), x));

            _termsCollection = new TermsCollection();
            foreach (Document document in documents)
            {
                _termsCollection.AddDocument(document);
            }
        }
Пример #3
0
        private void InitializeSearcherWithStemmer()
        {
            _termsCollection =
                new TermsProvider(ProvideStemmer())
                    .LoadFile(
                        Path.Combine(
                            AppDomain.CurrentDomain.BaseDirectory,
                            KeywordsFilePath));

            _documentPurer = new DocumentPurer(_termsCollection);

            IEnumerable<ExtendedDocument> documents = new DocumentsProvider(ProvideStemmer(), _documentPurer).Read(Path.Combine(
                AppDomain.CurrentDomain.BaseDirectory,
                DocumentsFilePath));

            _tfIdfIndexer = new TfIdfIndexer(documents, _termsCollection);
        }
Пример #4
0
 public DocumentPurer(TermsCollection termsCollection)
 {
     _termsCollection = termsCollection;
 }
Пример #5
0
 public TfIdfIndexer(IEnumerable<Document> documents, TermsCollection termsCollection)
     : this(documents)
 {
     _termsCollection = termsCollection;
 }