Exemplo n.º 1
0
        public IndexerSetupResult Setup(IXDescriptor descriptor)
        {
            if (setup)
            {
                return(IndexerSetupResult.Failure);
            }

            hashFactory = new System.Security.Cryptography.SHA256Managed();

            string _v = Convert.ToBase64String(hashFactory.ComputeHash(System.Text.UTF8Encoding.UTF8.GetBytes(descriptor.ToString())));

            int df = indexSearcher.DocFreq(new Term(indexerDocumentDescriptorVersion, _v));

            // set up searcher
            TermDocs term = indexSearcher.IndexReader.TermDocs();

            List <Document> docs = new List <Document>();

            while (term.Next())
            {
                docs.Add(indexSearcher.Doc(term.Doc));
            }

            return(IndexerSetupResult.Okay);
        }
Exemplo n.º 2
0
        /// <summary> Check whether the word exists in the index.</summary>
        /// <param name="word">String
        /// </param>
        /// <throws>  IOException </throws>
        /// <returns> true iff the word exists in the index
        /// </returns>
        public virtual bool Exist(System.String word)
        {
            // obtainSearcher calls ensureOpen
            IndexSearcher indexSearcher = ObtainSearcher();

            try
            {
                return(indexSearcher.DocFreq(F_WORD_TERM.CreateTerm(word)) > 0);
            }
            finally
            {
                ReleaseSearcher(indexSearcher);
            }
        }
        public void AddDocument(global::Lucene.Net.Documents.Document doc, Analyzer analyzer, IState state)
        {
            var fieldables = doc.GetFieldables(_field);

            if (fieldables == null)
            {
                return;
            }

            foreach (var fieldable in fieldables)
            {
                if (fieldable == null)
                {
                    continue;
                }

                TextReader reader;
                var        str = fieldable.StringValue(state);
                if (!string.IsNullOrEmpty(str))
                {
                    reader = new StringReader(str);
                }
                else
                {
                    // We are reusing the fieldable for indexing. Instead of recreating it, we just reset the underlying text reader.
                    reader = fieldable.ReaderValue;
                    if (reader is ReusableStringReader stringReader)
                    {
                        if (stringReader.Length == 0)
                        {
                            continue;
                        }

                        stringReader.Reset();
                    }
                    else if (reader is StreamReader streamReader)
                    {
                        if (streamReader.BaseStream.Length == 0)
                        {
                            continue;
                        }

                        streamReader.BaseStream.Position = 0;
                    }
                    else
                    {
                        continue;
                    }
                }

                var tokenStream = analyzer.ReusableTokenStream(_field, reader);
                while (tokenStream.IncrementToken())
                {
                    var word = tokenStream.GetAttribute <ITermAttribute>().Term;

                    // Index
                    int len = word.Length;
                    if (len < 3)
                    {
                        continue; // too short we bail but "too long" is fine...
                    }

                    // Early skip avoiding allocation of terms and searching.
                    if (_alreadySeen.Contains(word))
                    {
                        continue;
                    }

                    _indexSearcher ??= new IndexSearcher(_directory, true, state);
                    if (_indexSearcher.DocFreq(_fWordTerm.CreateTerm(word), state) <= 0)
                    {
                        // the word does not exist in the gramindex
                        int min = GetMin(len);

                        _indexWriter.AddDocument(CreateDocument(word, min, min + 1), state);
                    }

                    _alreadySeen.Add(word);
                }
            }
        }
        private string[] QueryOverSingleWord <TDistance>(SuggestionField suggestionField, string word, SuggestionOptions options, TDistance sd)
            where TDistance : IStringDistance
        {
            var min         = options.Accuracy ?? SuggestionOptions.DefaultAccuracy;
            var field       = suggestionField.Name;
            var pageSize    = options.PageSize;
            var morePopular = options.SortMode == SuggestionSortMode.Popularity;

            int lengthWord = word.Length;

            var ir = _searcher.IndexReader;

            int freq     = (ir != null && field != null) ? ir.DocFreq(new Term(FWord, word), _state) : 0;
            int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;

            // if the word exists in the real index and we don't care for word frequency, return the word itself
            if (!morePopular && freq > 0)
            {
                return(new[] { word });
            }

            var query = new BooleanQuery();

            var alreadySeen = new HashSet <string>();

            int ng  = GetMin(lengthWord);
            int max = ng + 1;

            var table = GramsTable;

            for (; ng <= max; ng++)
            {
                string[] grams = FormGrams(word, ng);

                if (grams.Length == 0)
                {
                    continue; // hmm
                }

                if (BoostStart > 0)
                {
                    // should we boost prefixes?
                    Add(query, table[ng].Start, grams[0], BoostStart); // matches start of word
                }

                if (BoostEnd > 0)
                {
                    // should we boost suffixes
                    Add(query, table[ng].End, grams[grams.Length - 1], BoostEnd); // matches end of word
                }

                for (int i = 0; i < grams.Length; i++)
                {
                    Add(query, table[ng].Gram, grams[i]);
                }
            }

            int maxHits = 10 * pageSize;

            //    System.out.println("Q: " + query);
            ScoreDoc[] hits = _searcher.Search(query, null, maxHits, _state).ScoreDocs;

            //    System.out.println("HITS: " + hits.length());
            var queue = new SuggestWordQueue(pageSize);

            // go thru more than 'maxr' matches in case the distance filter triggers
            int stop = Math.Min(hits.Length, maxHits);

            var suggestedWord = new SuggestWord();

            for (int i = 0; i < stop; i++)
            {
                suggestedWord.Term = _searcher.Doc(hits[i].Doc, _state).Get(FWord, _state); // get orig word

                // don't suggest a word for itself, that would be silly
                if (suggestedWord.Term.Equals(word, StringComparison.OrdinalIgnoreCase))
                {
                    continue;
                }

                // edit distance
                suggestedWord.Score = sd.GetDistance(word, suggestedWord.Term);
                if (suggestedWord.Score < min)
                {
                    continue;
                }

                if (ir != null && field != null)
                {
                    // use the user index
                    suggestedWord.Freq = _searcher.DocFreq(new Term(FWord, suggestedWord.Term), _state); // freq in the index

                    // don't suggest a word that is not present in the field
                    if ((morePopular && goalFreq > suggestedWord.Freq) || suggestedWord.Freq < 1)
                    {
                        continue;
                    }
                }

                if (alreadySeen.Add(suggestedWord.Term) == false) // we already seen this word, no point returning it twice
                {
                    continue;
                }

                queue.InsertWithOverflow(suggestedWord);
                if (queue.Size() == pageSize)
                {
                    // if queue full, maintain the minScore score
                    min = queue.Top().Score;
                }

                suggestedWord = new SuggestWord();
            }

            int size = queue.Size();

            if (size == 0)
            {
                return(EmptyArray);
            }

            // convert to array string
            string[] list = new string[size];
            for (int i = size - 1; i >= 0; i--)
            {
                list[i] = queue.Pop().Term;
            }

            return(list);
        }