Пример #1
0
        private void CheckMatches(string qString, string expectedVals)
        {
            ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer);

            qp.InOrder           = inOrder;
            qp.FuzzyPrefixLength = 1; // usually a good idea

            Query q = qp.Parse(qString);

            ISet <string> expecteds = new JCG.HashSet <string>();

            string[] vals = expectedVals.Split(',').TrimEnd();
            for (int i = 0; i < vals.Length; i++)
            {
                if (vals[i].Length > 0)
                {
                    expecteds.Add(vals[i]);
                }
            }

            TopDocs td = searcher.Search(q, 10);

            ScoreDoc[] sd = td.ScoreDocs;
            for (int i = 0; i < sd.Length; i++)
            {
                Document doc = searcher.Doc(sd[i].Doc);
                string   id  = doc.Get("id");
                assertTrue(qString + "matched doc#" + id + " not expected", expecteds
                           .Contains(id));
                expecteds.Remove(id);
            }

            assertEquals(qString + " missing some matches ", 0, expecteds.Count);
        }
Пример #2
0
 /// <summary>
 /// Returns the strings that can be produced from the given state, or
 /// <c>false</c> if more than <paramref name="limit"/> strings are found.
 /// <paramref name="limit"/>&lt;0 means "infinite".
 /// </summary>
 private static bool GetFiniteStrings(State s, JCG.HashSet <State> pathstates, JCG.HashSet <Int32sRef> strings, Int32sRef path, int limit)
 {
     pathstates.Add(s);
     foreach (Transition t in s.GetTransitions())
     {
         if (pathstates.Contains(t.to))
         {
             return(false);
         }
         for (int n = t.min; n <= t.max; n++)
         {
             path.Grow(path.Length + 1);
             path.Int32s[path.Length] = n;
             path.Length++;
             if (t.to.accept)
             {
                 strings.Add(Int32sRef.DeepCopyOf(path));
                 if (limit >= 0 && strings.Count > limit)
                 {
                     return(false);
                 }
             }
             if (!GetFiniteStrings(t.to, pathstates, strings, path, limit))
             {
                 return(false);
             }
             path.Length--;
         }
     }
     pathstates.Remove(s);
     return(true);
 }
Пример #3
0
 public override void ClearLock(string lockName)
 {
     lock (locks)
     {
         if (locks.Contains(lockName))
         {
             locks.Remove(lockName);
         }
     }
 }
Пример #4
0
 protected override void Dispose(bool disposing)
 {
     if (disposing)
     {
         lock (locks)
         {
             locks.Remove(lockName);
         }
     }
 }
Пример #5
0
        public virtual void TestRandom()
        {
            Directory         d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, d);

            w.IndexWriter.Config.SetMaxBufferedDocs(17);
            int           numDocs = AtLeast(100);
            ISet <string> aDocs   = new JCG.HashSet <string>();

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                string   v;
                if (Random.Next(5) == 4)
                {
                    v = "a";
                    aDocs.Add("" + i);
                }
                else
                {
                    v = "b";
                }
                Field f = NewStringField("field", v, Field.Store.NO);
                doc.Add(f);
                doc.Add(NewStringField("id", "" + i, Field.Store.YES));
                w.AddDocument(doc);
            }

            int numDelDocs = AtLeast(10);

            for (int i = 0; i < numDelDocs; i++)
            {
                string delID = "" + Random.Next(numDocs);
                w.DeleteDocuments(new Term("id", delID));
                aDocs.Remove(delID);
            }

            IndexReader r = w.GetReader();

            w.Dispose();
            TopDocs hits = NewSearcher(r).Search(new MatchAllDocsQuery(), new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), numDocs);

            Assert.AreEqual(aDocs.Count, hits.TotalHits);
            foreach (ScoreDoc sd in hits.ScoreDocs)
            {
                Assert.IsTrue(aDocs.Contains(r.Document(sd.Doc).Get("id")));
            }
            r.Dispose();
            d.Dispose();
        }
Пример #6
0
 /// <summary>
 /// Checks whether there is a loop containing s. (this is sufficient since
 /// there are never transitions to dead states.)
 /// </summary>
 // TODO: not great that this is recursive... in theory a
 // large automata could exceed java's stack
 private static bool IsFiniteSlow(State s, JCG.HashSet <State> path)
 {
     path.Add(s);
     foreach (Transition t in s.GetTransitions())
     {
         if (path.Contains(t.to) || !IsFiniteSlow(t.to, path))
         {
             return(false);
         }
     }
     path.Remove(s);
     return(true);
 }
Пример #7
0
        /// <summary>
        /// Guesses the languages of a word.
        /// </summary>
        /// <param name="input">The word.</param>
        /// <returns>A Set of Strings of language names that are potential matches for the input word.</returns>
        public virtual LanguageSet GuessLanguages(string input)
        {
            string text = input.ToLowerInvariant();

            ISet <string> langs = new JCG.HashSet <string>(this.languages.GetLanguages());

            foreach (LangRule rule in this.rules)
            {
                if (rule.Matches(text))
                {
                    if (rule.acceptOnMatch)
                    {
                        IList <string> toRemove = new JCG.List <string>();
                        foreach (var item in langs)
                        {
                            if (!rule.languages.Contains(item))
                            {
                                toRemove.Add(item);
                            }
                        }
                        foreach (var item in toRemove)
                        {
                            langs.Remove(item);
                        }
                    }
                    else
                    {
                        foreach (var item in rule.languages)
                        {
                            langs.Remove(item);
                        }
                    }
                }
            }

            LanguageSet ls = LanguageSet.From(langs);

            return(ls.Equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls);
        }
Пример #8
0
 protected override void Dispose(bool disposing)
 {
     if (disposing)
     {
         UninterruptableMonitor.Enter(locks);
         try
         {
             locks.Remove(lockName);
         }
         finally
         {
             UninterruptableMonitor.Exit(locks);
         }
     }
 }
Пример #9
0
 public override void ClearLock(string lockName)
 {
     UninterruptableMonitor.Enter(locks);
     try
     {
         if (locks.Contains(lockName))
         {
             locks.Remove(lockName);
         }
     }
     finally
     {
         UninterruptableMonitor.Exit(locks);
     }
 }
        public virtual void TestAllocateAndFree()
        {
            RecyclingInt32BlockAllocator allocator = NewAllocator();
            ISet <int[]> allocated        = new JCG.HashSet <int[]>();
            int          freeButAllocated = 0;

            int[] block = allocator.GetInt32Block();
            allocated.Add(block);
            Assert.IsNotNull(block);
            int size = block.Length;

            int numIters = AtLeast(97);

            for (int i = 0; i < numIters; i++)
            {
                int num = 1 + Random.Next(39);
                for (int j = 0; j < num; j++)
                {
                    block            = allocator.GetInt32Block();
                    freeButAllocated = Math.Max(0, freeButAllocated - 1);
                    Assert.IsNotNull(block);
                    Assert.AreEqual(size, block.Length);
                    Assert.IsTrue(allocated.Add(block), "block is returned twice");
                    Assert.AreEqual(4 * size * (allocated.Count + allocator.NumBufferedBlocks), allocator.BytesUsed, "" + (4 * size * (allocated.Count + allocator.NumBufferedBlocks) - allocator.BytesUsed));
                }

                int[][] array = allocated.ToArray(/*new int[0][]*/);
                int     begin = Random.Next(array.Length);
                int     end   = begin + Random.Next(array.Length - begin);
                for (int j = begin; j < end; j++)
                {
                    int[] b = array[j];
                    Assert.IsTrue(allocated.Remove(b));
                }
                allocator.RecycleInt32Blocks(array, begin, end);
                for (int j = begin; j < end; j++)
                {
                    Assert.IsNull(array[j]);
                }
                // randomly free blocks
                int numFreeBlocks = allocator.NumBufferedBlocks;
                int freeBlocks    = allocator.FreeBlocks(Random.Next(7 + allocator.MaxBufferedBlocks));
                Assert.AreEqual(allocator.NumBufferedBlocks, numFreeBlocks - freeBlocks);
            }
        }
            protected internal RandomDocumentFactory(BaseTermVectorsFormatTestCase baseTermVectorsFormatTestCase, int distinctFieldNames, int disctinctTerms)
            {
                this.outerInstance = baseTermVectorsFormatTestCase;
                ISet <string> fieldNames = new JCG.HashSet <string>();

                while (fieldNames.Count < distinctFieldNames)
                {
                    fieldNames.Add(TestUtil.RandomSimpleString(Random));
                    fieldNames.Remove("id");
                }
                this.fieldNames = fieldNames.ToArray(/*new string[0]*/);
                terms           = new string[disctinctTerms];
                termBytes       = new BytesRef[disctinctTerms];
                for (int i = 0; i < disctinctTerms; ++i)
                {
                    terms[i]     = TestUtil.RandomRealisticUnicodeString(Random);
                    termBytes[i] = new BytesRef(terms[i]);
                }
            }
        public virtual void TestAllocateAndRecycle()
        {
            RecyclingByteBlockAllocator allocator = NewAllocator();
            var allocated = new JCG.HashSet <byte[]>();

            var block = allocator.GetByteBlock();

            allocated.Add(block);
            Assert.IsNotNull(block);
            int size = block.Length;

            int numIters = AtLeast(97);

            for (int i = 0; i < numIters; i++)
            {
                int num = 1 + Random.Next(39);
                for (int j = 0; j < num; j++)
                {
                    block = allocator.GetByteBlock();
                    Assert.IsNotNull(block);
                    Assert.AreEqual(size, block.Length);
                    Assert.IsTrue(allocated.Add(block), "block is returned twice");
                    Assert.AreEqual(size * (allocated.Count + allocator.NumBufferedBlocks), allocator.BytesUsed);
                }
                var array    = allocated.ToArray();
                int begin    = Random.Next(array.Length);
                int end      = begin + Random.Next(array.Length - begin);
                var selected = new List <byte[]>();
                for (int j = begin; j < end; j++)
                {
                    selected.Add(array[j]);
                }
                allocator.RecycleByteBlocks(array, begin, end);
                for (int j = begin; j < end; j++)
                {
                    Assert.IsNull(array[j]);
                    var b = selected[0];
                    selected.RemoveAt(0);
                    Assert.IsTrue(allocated.Remove(b));
                }
            }
        }
Пример #13
0
        public virtual void TestEquality()
        {
            var values = new List <string> {
                "sally", "sells", "seashells", "by", "the", "sea", "shore"
            };
            var charArraySet     = new CharArraySet(TEST_VERSION_CURRENT, values, false);
            var charArraySetCopy = new CharArraySet(TEST_VERSION_CURRENT, values, false);

            values.Reverse();
            var charArraySetReverse = new CharArraySet(TEST_VERSION_CURRENT, values, false);
            var equatableSetReverse = new JCG.HashSet <string>(values);

            assertTrue(charArraySet.GetHashCode().Equals(charArraySetCopy.GetHashCode()));
            assertTrue(charArraySet.Equals(charArraySetCopy));
            assertTrue(charArraySet.GetHashCode().Equals(charArraySetReverse.GetHashCode()));
            assertTrue(charArraySet.Equals(charArraySetReverse));
            assertTrue(charArraySet.GetHashCode().Equals(equatableSetReverse.GetHashCode()));
            assertTrue(charArraySet.Equals(equatableSetReverse));

            values = new List <string> {
                "sally", "seashells", "by", "the", "sea", "shore"
            };
            charArraySet.Clear();
            charArraySet.UnionWith(values);

            assertFalse(charArraySet.GetHashCode().Equals(charArraySetCopy.GetHashCode()));
            assertFalse(charArraySet.Equals(charArraySetCopy));
            assertFalse(charArraySet.GetHashCode().Equals(charArraySetReverse.GetHashCode()));
            assertFalse(charArraySet.Equals(charArraySetReverse));
            assertFalse(charArraySet.GetHashCode().Equals(equatableSetReverse.GetHashCode()));
            assertFalse(charArraySet.Equals(equatableSetReverse));

            equatableSetReverse.Remove("sells");
            assertTrue(charArraySet.GetHashCode().Equals(equatableSetReverse.GetHashCode()));
            assertTrue(charArraySet.Equals(equatableSetReverse));
        }
Пример #14
0
        /// <summary>
        /// Adds epsilon transitions to the given automaton. This method adds extra
        /// character interval transitions that are equivalent to the given set of
        /// epsilon transitions.
        /// </summary>
        /// <param name="a"> Automaton. </param>
        /// <param name="pairs"> Collection of <see cref="StatePair"/> objects representing pairs of
        ///          source/destination states where epsilon transitions should be
        ///          added. </param>
        public static void AddEpsilons(Automaton a, ICollection <StatePair> pairs)
        {
            a.ExpandSingleton();
            Dictionary <State, JCG.HashSet <State> > forward = new Dictionary <State, JCG.HashSet <State> >();
            Dictionary <State, JCG.HashSet <State> > back    = new Dictionary <State, JCG.HashSet <State> >();

            foreach (StatePair p in pairs)
            {
                if (!forward.TryGetValue(p.s1, out JCG.HashSet <State> to))
                {
                    to            = new JCG.HashSet <State>();
                    forward[p.s1] = to;
                }
                to.Add(p.s2);
                if (!back.TryGetValue(p.s2, out JCG.HashSet <State> from))
                {
                    from       = new JCG.HashSet <State>();
                    back[p.s2] = from;
                }
                from.Add(p.s1);
            }
            // calculate epsilon closure
            LinkedList <StatePair> worklist = new LinkedList <StatePair>(pairs);

            JCG.HashSet <StatePair> workset = new JCG.HashSet <StatePair>(pairs);
            while (worklist.Count > 0)
            {
                StatePair p = worklist.First.Value;
                worklist.Remove(p);
                workset.Remove(p);
#pragma warning disable IDE0018 // Inline variable declaration
                JCG.HashSet <State> from;
#pragma warning restore IDE0018 // Inline variable declaration
                if (forward.TryGetValue(p.s2, out JCG.HashSet <State> to))
                {
                    foreach (State s in to)
                    {
                        StatePair pp = new StatePair(p.s1, s);
                        if (!pairs.Contains(pp))
                        {
                            pairs.Add(pp);
                            forward[p.s1].Add(s);
                            back[s].Add(p.s1);
                            worklist.AddLast(pp);
                            workset.Add(pp);
                            if (back.TryGetValue(p.s1, out from))
                            {
                                foreach (State q in from)
                                {
                                    StatePair qq = new StatePair(q, p.s1);
                                    if (!workset.Contains(qq))
                                    {
                                        worklist.AddLast(qq);
                                        workset.Add(qq);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // add transitions
            foreach (StatePair p in pairs)
            {
                p.s1.AddEpsilon(p.s2);
            }
            a.deterministic = false;
            //a.clearHashCode();
            a.ClearNumberedStates();
            a.CheckMinimizeAlways();
        }
Пример #15
0
        public virtual void TestGetChildren()
        {
            Directory dir = NewDirectory();
            var       taxoWriter = new DirectoryTaxonomyWriter(dir);
            int       numCategories = AtLeast(10);
            int       numA = 0, numB = 0;
            Random    random = Random;

            // add the two categories for which we'll also add children (so asserts are simpler)
            taxoWriter.AddCategory(new FacetLabel("a"));
            taxoWriter.AddCategory(new FacetLabel("b"));
            for (int i = 0; i < numCategories; i++)
            {
                if (random.NextBoolean())
                {
                    taxoWriter.AddCategory(new FacetLabel("a", Convert.ToString(i, CultureInfo.InvariantCulture)));
                    ++numA;
                }
                else
                {
                    taxoWriter.AddCategory(new FacetLabel("b", Convert.ToString(i, CultureInfo.InvariantCulture)));
                    ++numB;
                }
            }
            // add category with no children
            taxoWriter.AddCategory(new FacetLabel("c"));
            taxoWriter.Dispose();

            var taxoReader = new DirectoryTaxonomyReader(dir);

            // non existing category
            TaxonomyReader.ChildrenEnumerator it = taxoReader.GetChildren(taxoReader.GetOrdinal(new FacetLabel("invalid")));

            Assert.AreEqual(false, it.MoveNext());

            // a category with no children
            it = taxoReader.GetChildren(taxoReader.GetOrdinal(new FacetLabel("c")));
            Assert.AreEqual(false, it.MoveNext());

            // arbitrary negative ordinal
            it = taxoReader.GetChildren(-2);
            Assert.AreEqual(false, it.MoveNext());

            // root's children
            var roots = new JCG.HashSet <string> {
                "a", "b", "c"
            };

            it = taxoReader.GetChildren(TaxonomyReader.ROOT_ORDINAL);
            while (roots.Count > 0)
            {
                it.MoveNext();
                FacetLabel root = taxoReader.GetPath(it.Current);
                Assert.AreEqual(1, root.Length);
                Assert.IsTrue(roots.Remove(root.Components[0]));
            }
            Assert.AreEqual(false, it.MoveNext());

            for (int i = 0; i < 2; i++)
            {
                FacetLabel cp      = i == 0 ? new FacetLabel("a") : new FacetLabel("b");
                int        ordinal = taxoReader.GetOrdinal(cp);
                it = taxoReader.GetChildren(ordinal);
                int numChildren = 0;
                int child;
                while (it.MoveNext())
                {
                    child = it.Current;
                    FacetLabel path = taxoReader.GetPath(child);
                    Assert.AreEqual(2, path.Length);
                    Assert.AreEqual(path.Components[0], i == 0 ? "a" : "b");
                    ++numChildren;
                }
                int expected = i == 0 ? numA : numB;
                Assert.AreEqual(expected, numChildren, "invalid num children");
            }
            taxoReader.Dispose();

            dir.Dispose();
        }
Пример #16
0
        public void TestRandomIndex()
        {
            Directory dir = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);
            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);
            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader = w.GetReader();
            AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader);
            string field = @"body";
            Terms terms = wrapper.GetTerms(field);
            var lowFreqQueue = new AnonymousPriorityQueue(this, 5);
            Util.PriorityQueue<TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5);
            try
            {
                TermsEnum iterator = terms.GetIterator(null);
                while (iterator.Next() != null)
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                List<TermAndFreq> highTerms = QueueToList(highFreqQueue);
                List<TermAndFreq> lowTerms = QueueToList(lowFreqQueue);
                IndexSearcher searcher = NewSearcher(reader);
                Occur lowFreqOccur = RandomOccur(Random);
                BooleanQuery verifyQuery = new BooleanQuery();
                CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
                }

                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc);
                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet<int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }
Пример #17
0
        public void TestRandomIndex()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(Random, dir, analyzer);

            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader        = w.GetReader();
            AtomicReader    wrapper       = SlowCompositeReaderWrapper.Wrap(reader);
            string          field         = @"body";
            Terms           terms         = wrapper.GetTerms(field);
            var             lowFreqQueue  = new PriorityQueueAnonymousClass(5);
            var             highFreqQueue = new PriorityQueueAnonymousClass1(5);

            try
            {
                TermsEnum iterator = terms.GetEnumerator();
                while (iterator.MoveNext())
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(
                                              BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(
                                             BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq  = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                IList <TermAndFreq> highTerms = QueueToList(highFreqQueue);
                IList <TermAndFreq> lowTerms  = QueueToList(lowFreqQueue);

                IndexSearcher    searcher     = NewSearcher(reader);
                Occur            lowFreqOccur = RandomOccur(Random);
                BooleanQuery     verifyQuery  = new BooleanQuery();
                CommonTermsQuery cq           = new CommonTermsQuery(RandomOccur(Random),
                                                                     lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field,
                                                                             termAndFreq.term)), lowFreqOccur));
                }
                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc);

                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet <int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);

                /*
                 *  need to force merge here since QueryUtils adds checks based
                 *  on leave readers which have different statistics than the top
                 *  level reader if we have more than one segment. This could
                 *  result in a different query / results.
                 */
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }
Пример #18
0
        public void TestTrecFeedDirAllTypes()
        {
            DirectoryInfo dataDir = CreateTempDir("trecFeedAllTypes");

            using (var stream = GetDataFile("trecdocs.zip"))
                TestUtil.Unzip(stream, dataDir);
            using TrecContentSource tcs = new TrecContentSource();
            Dictionary <string, string> props = new Dictionary <string, string>();

            props["print.props"]                     = "false";
            props["content.source.verbose"]          = "false";
            props["content.source.excludeIteration"] = "true";
            props["doc.maker.forever"]               = "false";
            props["docs.dir"]               = dataDir.GetCanonicalPath().Replace('\\', '/');
            props["trec.doc.parser"]        = typeof(TrecParserByPath).AssemblyQualifiedName;
            props["content.source.forever"] = "false";
            tcs.SetConfig(new Config(props));
            tcs.ResetInputs();
            DocData dd = new DocData();
            int     n  = 0;
            bool    gotExpectedException = false;
            // LUCENENET specific - skip our UNKNOWN element.
            var pathTypes = ((ParsePathType[])Enum.GetValues(typeof(ParsePathType))).Where(x => x != ParsePathType.UNKNOWN).ToArray();
            ISet <ParsePathType> unseenTypes = new JCG.HashSet <ParsePathType>(pathTypes);

            try
            {
                while (n < 100)
                { // arbiterary limit to prevent looping forever in case of test failure
                    dd = tcs.GetNextDocData(dd);
                    ++n;
                    assertNotNull("doc data " + n + " should not be null!", dd);
                    unseenTypes.Remove(tcs.currPathType);
                    switch (tcs.currPathType)
                    {
                    case ParsePathType.GOV2:
                        assertDocData(dd, "TEST-000", "TEST-000 title", "TEST-000 text", tcs.ParseDate("Sun, 11 Jan 2009 08:00:00 GMT"));
                        break;

                    case ParsePathType.FBIS:
                        assertDocData(dd, "TEST-001", "TEST-001 Title", "TEST-001 text", tcs.ParseDate("1 January 1991"));
                        break;

                    case ParsePathType.FR94:
                        // no title extraction in this source for now
                        assertDocData(dd, "TEST-002", null, "DEPARTMENT OF SOMETHING", tcs.ParseDate("February 3, 1994"));
                        break;

                    case ParsePathType.FT:
                        assertDocData(dd, "TEST-003", "Test-003 title", "Some pub text", tcs.ParseDate("980424"));
                        break;

                    case ParsePathType.LATIMES:
                        assertDocData(dd, "TEST-004", "Test-004 Title", "Some paragraph", tcs.ParseDate("January 17, 1997, Sunday"));
                        break;

                    default:
                        assertTrue("Should never get here!", false);
                        break;
                    }
                }
            }
#pragma warning disable 168
            catch (NoMoreDataException e)
#pragma warning restore 168
            {
                gotExpectedException = true;
            }
            assertTrue("Should have gotten NoMoreDataException!", gotExpectedException);
            assertEquals("Wrong number of documents created by source!", 5, n);
            assertTrue("Did not see all types!", unseenTypes.Count == 0);
        }