Example #1
0
 public LuceneIterator(LuceneDictionary enclosingInstance)
 {
     InitBlock(enclosingInstance);
     try
     {
         termEnum = Enclosing_Instance.reader.Terms(new Term(Enclosing_Instance.field, ""));
     }
     catch (System.IO.IOException ex)
     {
         System.Console.Error.WriteLine(ex.StackTrace);
     }
 }
Example #2
0
 public LuceneIterator(LuceneDictionary enclosingInstance)
 {
     InitBlock(enclosingInstance);
     try
     {
         termEnum = Enclosing_Instance.reader.Terms(new Term(Enclosing_Instance.field, ""));
     }
     catch (System.IO.IOException ex)
     {
         System.Console.Error.WriteLine(ex.StackTrace);
     }
 }
        public void TestFieldNonExistent()
        {
            try
            {
                indexReader = IndexReader.Open(store);

                ld = new LuceneDictionary(indexReader, "nonexistent_field");
                it = ld.GetWordsIterator();

                AssertFalse("More elements than expected", it.HasNext());
                AssertTrue("Nonexistent element is really null", it.Next() == null);
            }
            finally
            {
                if (indexReader != null) { indexReader.Close(); }
            }
        }
Example #4
0
 private void  InitBlock(LuceneDictionary enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
        public void TestFieldAaa()
        {
            try
            {
                indexReader = IndexReader.Open(store);

                ld = new LuceneDictionary(indexReader, "aaa");
                it = ld.GetWordsIterator();

                AssertTrue("First element doesn't exist.", it.HasNext());
                AssertTrue("First element isn't correct", it.Next().Equals("foo"));
                AssertFalse("More elements than expected", it.HasNext());
                AssertTrue("Nonexistent element is really null", it.Next() == null);
            }
            finally
            {
                if (indexReader != null) { indexReader.Close(); }
            }
        }
        public void TestFieldContents_2()
        {
            try
            {
                indexReader = IndexReader.Open(store);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.GetWordsIterator();
                
                // hasNext() should have no side effects //{{DIGY}} But has. Need a fix?
                //AssertTrue("First element isn't were it should be.", it.HasNext());
                //AssertTrue("First element isn't were it should be.", it.HasNext());
                //AssertTrue("First element isn't were it should be.", it.HasNext());

                // just iterate through words
                AssertTrue("First element isn't correct", it.Next().Equals("Jerry"));
                AssertTrue("Second element isn't correct", it.Next().Equals("Tom"));
                AssertTrue("Nonexistent element is really null", it.Next() == null);

                // hasNext() should still have no side effects ...
                AssertFalse("There should be any more elements", it.HasNext());
                AssertFalse("There should be any more elements", it.HasNext());
                AssertFalse("There should be any more elements", it.HasNext());

                // .. and there are really no more words
                AssertTrue("Nonexistent element is really null", it.Next() == null);
                AssertTrue("Nonexistent element is really null", it.Next() == null);
                AssertTrue("Nonexistent element is really null", it.Next() == null);
            }
            finally
            {
                if (indexReader != null) { indexReader.Close(); }
            }
        }
        public void TestFieldContents_1()
        {
            try
            {
                indexReader = IndexReader.Open(store);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.GetWordsIterator();

                AssertTrue("First element doesn't exist.", it.HasNext());
                AssertTrue("First element isn't correct", it.Next().Equals("Jerry"));
                AssertTrue("Second element doesn't exist.", it.HasNext());
                AssertTrue("Second element isn't correct", it.Next().Equals("Tom"));
                AssertFalse("More elements than expected", it.HasNext());
                AssertTrue("Nonexistent element is really null", it.Next() == null);

                ld = new LuceneDictionary(indexReader, "contents");
                it = ld.GetWordsIterator();

                int counter = 2;
                while (it.HasNext())
                {
                    it.Next();
                    counter--;
                }

                AssertTrue("Number of words incorrect", counter == 0);
            }
            finally
            {
                if (indexReader != null) { indexReader.Close(); }
            }
        }
Example #8
0
 private void  InitBlock(LuceneDictionary enclosingInstance)
 {
     this.enclosingInstance = enclosingInstance;
 }
        /// <summary>
        /// Open the index in the given directory and create a new index of word frequency for the 
        /// given index.</summary>
        /// <param name="sourceDirectory">Directory containing the index to count words in.</param>
        /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param>
        public void BuildAutoCompleteIndex(Directory sourceDirectory, Directory TargetDirectory, bool verbose)
        {
            // build a dictionary (from the spell package)
            using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true))
            {

                string[] fieldNames = sourceReader.GetFieldNames(IndexReader.FieldOption.ALL).ToArray();
                foreach (string fieldToAutocomplete in fieldNames)
                {
                    if (fieldToAutocomplete.Contains("__display name") || fieldToAutocomplete.Contains("_name") || !fieldToAutocomplete.Contains('_') || !fieldToAutocomplete.Contains("date") || !fieldToAutocomplete.Contains("threshold"))
                    {
                        LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);

                        // code from
                        // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
                        // Dictionary)
                        //IndexWriter.Unlock(m_directory);

                        // use a custom analyzer so we can do EdgeNGramFiltering
                        AutoCompleteAnalyzer analyzer = new AutoCompleteAnalyzer();
                        using (var writer = new IndexWriter(TargetDirectory, analyzer, IsFirstTime, IndexWriter.MaxFieldLength.UNLIMITED))
                        {
                            writer.SetMergeFactor(300);
                            writer.SetMaxBufferedDocs(150);

                            // go through every word, storing the original word (incl. n-grams)
                            // and the number of times it occurs
                            System.Collections.IEnumerator ie = dict.GetWordsIterator();
                            double num;
                            Guid guid;
                            foreach (string word in dict)
                            {
                                if (word.Length < UtilitySettings.AllowedMinimumWordLengthToBeIndexed)
                                    continue; // too short we bail but
                                if (word.Length > UtilitySettings.AllowedMaxWordLengthToBeIndexed)
                                    continue; //too long also we bail out

                                if (!word.Contains('<') && !word.Contains('>') && !word.Contains('/') && !word.Contains('\\') && !isNotFile(word) && !word.Contains('@') && !word.Contains('&') && !double.TryParse(word, out num) && !Guid.TryParse(word, out guid))
                                {
                                    // ok index the word
                                    // use the number of documents this word appears in
                                    int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word));
                                    if (verbose)
                                    {
                                        log.Info(string.Format("Frequency {0} of this word {1}", freq, word));
                                    }
                                    var doc = MakeDocument(fieldToAutocomplete, word, freq);
                                    writer.AddDocument(doc);
                                }
                            }
                            writer.Optimize();
                        }
                    }
                }
            }

            // re-open our reader
            //ReplaceSearcher();
        }
Example #10
0
        /// <summary>
        /// Open the index in the given directory and create a new index of word frequency for the 
        /// given index.</summary>
        /// <param name="sourceDirectory">Directory containing the index to count words in.</param>
        /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param>
        public void BuildAutoCompleteIndex(Directory sourceDirectory, String fieldToAutocomplete)
        {
            // build a dictionary (from the spell package)
            using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true))
            {
                LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);

                // code from
                // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
                // Dictionary)
                //IndexWriter.Unlock(m_directory);

                // use a custom analyzer so we can do EdgeNGramFiltering
                var analyzer = new AutoCompleteAnalyzer();
                using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED))
                {
                    writer.MergeFactor = 300;
                    writer.SetMaxBufferedDocs(150);

                    // go through every word, storing the original word (incl. n-grams)
                    // and the number of times it occurs
                    foreach (string word in dict)
                    {
                        if (word.Length < 3)
                            continue; // too short we bail but "too long" is fine...

                        // ok index the word
                        // use the number of documents this word appears in
                        int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word));
                        var doc = MakeDocument(fieldToAutocomplete, word, freq);

                        writer.AddDocument(doc);
                    }

                    writer.Optimize();
                }

            }

            // re-open our reader
            ReplaceSearcher();
        }