Exemplo n.º 1
0
        public void SaveTermInfo(TermInfo info)
        {
            var formatter = new BinaryFormatter();

            using (var fs = new FileStream(Path.Combine(_dir.FullName, info.Term + TERM_INFO_SUFFIX), FileMode.Create))
            {
                formatter.Serialize(fs, info);
            }
        }
Exemplo n.º 2
0
 public void SaveTermInfo(TermInfo info)
 {
     _termInfos[info.Term] = info;
 }
Exemplo n.º 3
0
Arquivo: Engine.cs Projeto: d4nt/Ansl
        /// <summary>
        /// Adds a document to the index, so it can appear in search results
        /// </summary>
        public void Index(Document document)
        {
            var documentTerms = new List<TermInfo>();

            foreach (var documentWord in document)
            {
                var term = _nonAlphaNumChars.Replace(documentWord, "");
                if (String.IsNullOrWhiteSpace(term) == false)
                {
                    if (_options.CaseSensitve == false)
                        term = term.ToLower();

                    TermInfo termInfo = null;

                    // first time we've seen this word in this document?
                    if (documentTerms.Any(t => t.Term == term))
                    {
                        termInfo = documentTerms.First(t => t.Term == term);
                    }
                    else if (_store.ContainsTerm(term))
                    {
                        termInfo = _store.LoadTermInfo(term);
                        termInfo.DocumentsContaining.Add(document.UniquieId);
                        documentTerms.Add(termInfo);
                    }
                    else
                    {
                        termInfo = new TermInfo() { Term = term };
                        termInfo.DocumentsContaining.Add(document.UniquieId);
                        documentTerms.Add(termInfo);
                    }

                    if (termInfo.TermFrequencyByDocumentId.ContainsKey(document.UniquieId))
                        termInfo.TermFrequencyByDocumentId[document.UniquieId] += 1;
                    else
                        termInfo.TermFrequencyByDocumentId[document.UniquieId] = 1;
                }
            }

            // If this document has been indexed before, cleanup any terms that are no longer present
            if (_store.ContainsDocumentInfo(document.UniquieId))
            {
                foreach (var term in _store.LoadDocumentInfo(document.UniquieId))
                {
                    // If this term is no longer references by this document...
                    if (documentTerms.Any(t => t.Term == term) == false)
                    {
                        // remove out of date term infos and re-save
                        var termInfo = _store.LoadTermInfo(term);

                        termInfo.DocumentsContaining.Remove(document.UniquieId);

                        _store.SaveTermInfo(termInfo);
                    }
                }
            }

            // Tell the store about this document
            _store.SaveDocumentInfo(document.UniquieId, documentTerms.Select(t => t.Term).ToList());

            // Tell the store about all the term infos we've found in this document
            foreach (var termInfo in documentTerms)
                _store.SaveTermInfo(termInfo);
        }