Esempio n. 1
0
        internal virtual void AssertQuery(Query query, Filter filter, Sort sort)
        {
            int     maxDoc = searcher.IndexReader.MaxDoc;
            TopDocs all;
            int     pageSize = TestUtil.NextInt32(Random, 1, maxDoc * 2);

            if (isVerbose)
            {
                Console.WriteLine("\nassertQuery " + (iter++) + ": query=" + query + " filter=" + filter + " sort=" + sort + " pageSize=" + pageSize);
            }
            bool doMaxScore = Random.NextBoolean();
            bool doScores   = Random.NextBoolean();

            if (sort == null)
            {
                all = searcher.Search(query, filter, maxDoc);
            }
            else if (sort == Sort.RELEVANCE)
            {
                all = searcher.Search(query, filter, maxDoc, sort, true, doMaxScore);
            }
            else
            {
                all = searcher.Search(query, filter, maxDoc, sort, doScores, doMaxScore);
            }
            if (isVerbose)
            {
                Console.WriteLine("  all.TotalHits=" + all.TotalHits);
                int upto = 0;
                foreach (ScoreDoc scoreDoc in all.ScoreDocs)
                {
                    Console.WriteLine("    hit " + (upto++) + ": id=" + searcher.Doc(scoreDoc.Doc).Get("id") + " " + scoreDoc);
                }
            }
            int      pageStart  = 0;
            ScoreDoc lastBottom = null;

            while (pageStart < all.TotalHits)
            {
                TopDocs paged;
                if (sort == null)
                {
                    if (isVerbose)
                    {
                        Console.WriteLine("  iter lastBottom=" + lastBottom);
                    }
                    paged = searcher.SearchAfter(lastBottom, query, filter, pageSize);
                }
                else
                {
                    if (isVerbose)
                    {
                        Console.WriteLine("  iter lastBottom=" + lastBottom);
                    }
                    if (sort == Sort.RELEVANCE)
                    {
                        paged = searcher.SearchAfter(lastBottom, query, filter, pageSize, sort, true, doMaxScore);
                    }
                    else
                    {
                        paged = searcher.SearchAfter(lastBottom, query, filter, pageSize, sort, doScores, doMaxScore);
                    }
                }
                if (isVerbose)
                {
                    Console.WriteLine("    " + paged.ScoreDocs.Length + " hits on page");
                }

                if (paged.ScoreDocs.Length == 0)
                {
                    break;
                }
                AssertPage(pageStart, all, paged);
                pageStart += paged.ScoreDocs.Length;
                lastBottom = paged.ScoreDocs[paged.ScoreDocs.Length - 1];
            }
            Assert.AreEqual(all.ScoreDocs.Length, pageStart);
        }
Esempio n. 2
0
        public override void SetUp()
        {
            base.SetUp();

            // LUCENENET specific: Moved this logic here to ensure that it is executed
            // after the class is setup - a field is way to early to execute this.
            bool supportsDocValues = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) == false;

            allSortFields = new List <SortField> {
#pragma warning disable 612,618
                new SortField("byte", SortFieldType.BYTE, false),
                new SortField("short", SortFieldType.INT16, false),
#pragma warning restore 612,618
                new SortField("int", SortFieldType.INT32, false),
                new SortField("long", SortFieldType.INT64, false),
                new SortField("float", SortFieldType.SINGLE, false),
                new SortField("double", SortFieldType.DOUBLE, false),
                new SortField("bytes", SortFieldType.STRING, false),
                new SortField("bytesval", SortFieldType.STRING_VAL, false),
#pragma warning disable 612,618
                new SortField("byte", SortFieldType.BYTE, true),
                new SortField("short", SortFieldType.INT16, true),
#pragma warning restore 612,618
                new SortField("int", SortFieldType.INT32, true),
                new SortField("long", SortFieldType.INT64, true),
                new SortField("float", SortFieldType.SINGLE, true),
                new SortField("double", SortFieldType.DOUBLE, true),
                new SortField("bytes", SortFieldType.STRING, true),
                new SortField("bytesval", SortFieldType.STRING_VAL, true),
                SortField.FIELD_SCORE,
                SortField.FIELD_DOC
            };

            if (supportsDocValues)
            {
                allSortFields.AddRange(new SortField[] {
                    new SortField("intdocvalues", SortFieldType.INT32, false),
                    new SortField("floatdocvalues", SortFieldType.SINGLE, false),
                    new SortField("sortedbytesdocvalues", SortFieldType.STRING, false),
                    new SortField("sortedbytesdocvaluesval", SortFieldType.STRING_VAL, false),
                    new SortField("straightbytesdocvalues", SortFieldType.STRING_VAL, false),
                    new SortField("intdocvalues", SortFieldType.INT32, true),
                    new SortField("floatdocvalues", SortFieldType.SINGLE, true),
                    new SortField("sortedbytesdocvalues", SortFieldType.STRING, true),
                    new SortField("sortedbytesdocvaluesval", SortFieldType.STRING_VAL, true),
                    new SortField("straightbytesdocvalues", SortFieldType.STRING_VAL, true)
                });
            }

            // Also test missing first / last for the "string" sorts:
            foreach (string field in new string[] { "bytes", "sortedbytesdocvalues" })
            {
                for (int rev = 0; rev < 2; rev++)
                {
                    bool      reversed = rev == 0;
                    SortField sf       = new SortField(field, SortFieldType.STRING, reversed);
                    sf.MissingValue = SortField.STRING_FIRST;
                    allSortFields.Add(sf);

                    sf = new SortField(field, SortFieldType.STRING, reversed);
                    sf.MissingValue = SortField.STRING_LAST;
                    allSortFields.Add(sf);
                }
            }

            int limit = allSortFields.Count;
            for (int i = 0; i < limit; i++)
            {
                SortField sf = allSortFields[i];
                if (sf.Type == SortFieldType.INT32)
                {
                    SortField sf2 = new SortField(sf.Field, SortFieldType.INT32, sf.IsReverse);
                    sf2.MissingValue = Random.Next();
                    allSortFields.Add(sf2);
                }
                else if (sf.Type == SortFieldType.INT64)
                {
                    SortField sf2 = new SortField(sf.Field, SortFieldType.INT64, sf.IsReverse);
                    sf2.MissingValue = Random.NextInt64();
                    allSortFields.Add(sf2);
                }
                else if (sf.Type == SortFieldType.SINGLE)
                {
                    SortField sf2 = new SortField(sf.Field, SortFieldType.SINGLE, sf.IsReverse);
                    sf2.MissingValue = (float)Random.NextDouble();
                    allSortFields.Add(sf2);
                }
                else if (sf.Type == SortFieldType.DOUBLE)
                {
                    SortField sf2 = new SortField(sf.Field, SortFieldType.DOUBLE, sf.IsReverse);
                    sf2.MissingValue = Random.NextDouble();
                    allSortFields.Add(sf2);
                }
            }

            dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            int numDocs = AtLeast(200);
            for (int i = 0; i < numDocs; i++)
            {
                IList <Field> fields = new List <Field>();
                fields.Add(NewTextField("english", English.Int32ToEnglish(i), Field.Store.NO));
                fields.Add(NewTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO));
                fields.Add(NewStringField("byte", "" + ((sbyte)Random.Next()).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                fields.Add(NewStringField("short", "" + ((short)Random.Next()).ToString(CultureInfo.InvariantCulture), Field.Store.NO));
                fields.Add(new Int32Field("int", Random.Next(), Field.Store.NO));
                fields.Add(new Int64Field("long", Random.NextInt64(), Field.Store.NO));

                fields.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO));
                fields.Add(new DoubleField("double", Random.NextDouble(), Field.Store.NO));
                fields.Add(NewStringField("bytes", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO));
                fields.Add(NewStringField("bytesval", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO));
                fields.Add(new DoubleField("double", Random.NextDouble(), Field.Store.NO));

                if (supportsDocValues)
                {
                    fields.Add(new NumericDocValuesField("intdocvalues", Random.Next()));
                    fields.Add(new SingleDocValuesField("floatdocvalues", (float)Random.NextDouble()));
                    fields.Add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random))));
                    fields.Add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random))));
                    fields.Add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random))));
                }
                Document document = new Document();
                document.Add(new StoredField("id", "" + i));
                if (isVerbose)
                {
                    Console.WriteLine("  add doc id=" + i);
                }
                foreach (Field field in fields)
                {
                    // So we are sometimes missing that field:
                    if (Random.Next(5) != 4)
                    {
                        document.Add(field);
                        if (isVerbose)
                        {
                            Console.WriteLine("    " + field);
                        }
                    }
                }

                iw.AddDocument(document);

                if (Random.Next(50) == 17)
                {
                    iw.Commit();
                }
            }
            reader = iw.GetReader();
            iw.Dispose();
            searcher = NewSearcher(reader);
            if (isVerbose)
            {
                Console.WriteLine("  searcher=" + searcher);
            }
        }
Esempio n. 3
0
        public virtual void TestRollingUpdates_Mem()
        {
            Random random             = new J2N.Randomizer(Random.NextInt64());
            BaseDirectoryWrapper dir  = NewDirectory();
            LineFileDocs         docs = new LineFileDocs(random, DefaultCodecSupportsDocValues);

            //provider.register(new MemoryCodec());
            if ((!"Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) && LuceneTestCase.Random.NextBoolean())
            {
                Codec.Default =
                    TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(LuceneTestCase.Random.nextBoolean(), random.NextSingle()));
            }

            MockAnalyzer analyzer = new MockAnalyzer(LuceneTestCase.Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(LuceneTestCase.Random, 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriter   w          = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            int           SIZE       = AtLeast(20);
            int           id         = 0;
            IndexReader   r          = null;
            IndexSearcher s          = null;
            int           numUpdates = (int)(SIZE * (2 + (TestNightly ? 200 * LuceneTestCase.Random.NextDouble() : 5 * LuceneTestCase.Random.NextDouble())));

            if (Verbose)
            {
                Console.WriteLine("TEST: numUpdates=" + numUpdates);
            }
            int updateCount = 0;

            // TODO: sometimes update ids not in order...
            for (int docIter = 0; docIter < numUpdates; docIter++)
            {
                Documents.Document doc  = docs.NextDoc();
                string             myID = "" + id;
                if (id == SIZE - 1)
                {
                    id = 0;
                }
                else
                {
                    id++;
                }
                if (Verbose)
                {
                    Console.WriteLine("  docIter=" + docIter + " id=" + id);
                }
                ((Field)doc.GetField("docid")).SetStringValue(myID);

                Term idTerm = new Term("docid", myID);

                bool doUpdate;
                if (s != null && updateCount < SIZE)
                {
                    TopDocs hits = s.Search(new TermQuery(idTerm), 1);
                    Assert.AreEqual(1, hits.TotalHits);
                    doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc);
                    if (Verbose)
                    {
                        if (doUpdate)
                        {
                            Console.WriteLine("  tryDeleteDocument failed");
                        }
                        else
                        {
                            Console.WriteLine("  tryDeleteDocument succeeded");
                        }
                    }
                }
                else
                {
                    doUpdate = true;
                    if (Verbose)
                    {
                        Console.WriteLine("  no searcher: doUpdate=true");
                    }
                }

                updateCount++;

                if (doUpdate)
                {
                    w.UpdateDocument(idTerm, doc);
                }
                else
                {
                    w.AddDocument(doc);
                }

                if (docIter >= SIZE && LuceneTestCase.Random.Next(50) == 17)
                {
                    if (r != null)
                    {
                        r.Dispose();
                    }

                    bool applyDeletions = LuceneTestCase.Random.NextBoolean();

                    if (Verbose)
                    {
                        Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions);
                    }

                    r = w.GetReader(applyDeletions);
                    if (applyDeletions)
                    {
                        s = NewSearcher(r);
                    }
                    else
                    {
                        s = null;
                    }
                    Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE);
                    updateCount = 0;
                }
            }

            if (r != null)
            {
                r.Dispose();
            }

            w.Commit();
            Assert.AreEqual(SIZE, w.NumDocs);

            w.Dispose();

            TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates");

            docs.Dispose();

            // LUCENE-4455:
            SegmentInfos infos = new SegmentInfos();

            infos.Read(dir);
            long totalBytes = 0;

            foreach (SegmentCommitInfo sipc in infos.Segments)
            {
                totalBytes += sipc.GetSizeInBytes();
            }
            long totalBytes2 = 0;

            foreach (string fileName in dir.ListAll())
            {
                if (!fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal))
                {
                    totalBytes2 += dir.FileLength(fileName);
                }
            }
            Assert.AreEqual(totalBytes2, totalBytes);
            dir.Dispose();
        }
Esempio n. 4
0
        public virtual void TestFlushExceptions()
        {
            MockDirectoryWrapper directory = NewMockDirectory();
            FailOnlyOnFlush      failure   = new FailOnlyOnFlush(this);

            directory.FailOn(failure);

            IndexWriter writer  = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMaxBufferedDocs(2));
            Document    doc     = new Document();
            Field       idField = NewStringField("id", "", Field.Store.YES);

            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: iter=" + i);
                }

                for (int j = 0; j < 20; j++)
                {
                    idField.SetStringValue(Convert.ToString(i * 20 + j));
                    writer.AddDocument(doc);
                }

                // must cycle here because sometimes the merge flushes
                // the doc we just added and so there's nothing to
                // flush, and we don't hit the exception
                while (true)
                {
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, true);
                        if (failure.hitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (IOException ioe)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine(ioe.StackTrace);
                        }
                        failure.ClearDoFail();
                        break;
                    }
                }
                Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs);
            }

            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(directory);

            Assert.AreEqual(200 + extraCount, reader.NumDocs);
            reader.Dispose();
            directory.Dispose();
        }
Esempio n. 5
0
        private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
                                                    bool multipleValuesPerDocument, bool scoreDocsInOrder)
        {
            IndexIterationContext context = new IndexIterationContext();
            int numRandomValues           = nDocs / 2;

            context.RandomUniqueValues = new string[numRandomValues];
            ISet <string> trackSet = new JCG.HashSet <string>();

            context.RandomFrom = new bool[numRandomValues];
            for (int i = 0; i < numRandomValues; i++)
            {
                string uniqueRandomValue;
                do
                {
                    uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    //        uniqueRandomValue = TestUtil.randomSimpleString(random);
                } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue));
                // Generate unique values and empty strings aren't allowed.
                trackSet.Add(uniqueRandomValue);
                context.RandomFrom[i]         = Random.NextBoolean();
                context.RandomUniqueValues[i] = uniqueRandomValue;
            }

            RandomDoc[] docs = new RandomDoc[nDocs];
            for (int i = 0; i < nDocs; i++)
            {
                string   id       = Convert.ToString(i);
                int      randomI  = Random.Next(context.RandomUniqueValues.Length);
                string   value    = context.RandomUniqueValues[randomI];
                Document document = new Document();
                document.Add(NewTextField(Random, "id", id, Field.Store.NO));
                document.Add(NewTextField(Random, "value", value, Field.Store.NO));

                bool from = context.RandomFrom[randomI];
                int  numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1;
                docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
                for (int j = 0; j < numberOfLinkValues; j++)
                {
                    string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)];
                    docs[i].LinkValues.Add(linkValue);
                    if (from)
                    {
                        if (!context.FromDocuments.TryGetValue(linkValue, out IList <RandomDoc> fromDocs))
                        {
                            context.FromDocuments[linkValue] = fromDocs = new List <RandomDoc>();
                        }
                        if (!context.RandomValueFromDocs.TryGetValue(value, out IList <RandomDoc> randomValueFromDocs))
                        {
                            context.RandomValueFromDocs[value] = randomValueFromDocs = new List <RandomDoc>();
                        }

                        fromDocs.Add(docs[i]);
                        randomValueFromDocs.Add(docs[i]);
                        document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO));
                    }
                    else
                    {
                        if (!context.ToDocuments.TryGetValue(linkValue, out IList <RandomDoc> toDocuments))
                        {
                            context.ToDocuments[linkValue] = toDocuments = new List <RandomDoc>();
                        }
                        if (!context.RandomValueToDocs.TryGetValue(value, out IList <RandomDoc> randomValueToDocs))
                        {
                            context.RandomValueToDocs[value] = randomValueToDocs = new List <RandomDoc>();
                        }

                        toDocuments.Add(docs[i]);
                        randomValueToDocs.Add(docs[i]);
                        document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO));
                    }
                }

                RandomIndexWriter w;
                if (from)
                {
                    w = fromWriter;
                }
                else
                {
                    w = toWriter;
                }

                w.AddDocument(document);
                if (Random.Next(10) == 4)
                {
                    w.Commit();
                }
                if (VERBOSE)
                {
                    Console.WriteLine("Added document[" + docs[i].Id + "]: " + document);
                }
            }

            // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
            // any ScoreMode.
            IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader());
            IndexSearcher toSearcher   = NewSearcher(toWriter.GetReader());

            for (int i = 0; i < context.RandomUniqueValues.Length; i++)
            {
                string uniqueRandomValue = context.RandomUniqueValues[i];
                string fromField;
                string toField;
                IDictionary <string, IDictionary <int, JoinScore> > queryVals;
                if (context.RandomFrom[i])
                {
                    fromField = "from";
                    toField   = "to";
                    queryVals = context.FromHitsToJoinScore;
                }
                else
                {
                    fromField = "to";
                    toField   = "from";
                    queryVals = context.ToHitsToJoinScore;
                }
                IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores));
                }
                else
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores));
                }

                IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    if (scoreDocsInOrder)
                    {
                        AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
                        Terms        terms = slowCompositeReader.GetTerms(toField);
                        if (terms != null)
                        {
                            DocsEnum  docsEnum  = null;
                            TermsEnum termsEnum = null;
                            JCG.SortedSet <BytesRef> joinValues =
                                new JCG.SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
                            joinValues.UnionWith(joinValueToJoinScores.Keys);
                            foreach (BytesRef joinValue in joinValues)
                            {
                                termsEnum = terms.GetIterator(termsEnum);
                                if (termsEnum.SeekExact(joinValue))
                                {
                                    docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE);
                                    JoinScore joinScore = joinValueToJoinScores[joinValue];

                                    for (int doc = docsEnum.NextDoc();
                                         doc != DocIdSetIterator.NO_MORE_DOCS;
                                         doc = docsEnum.NextDoc())
                                    {
                                        // First encountered join value determines the score.
                                        // Something to keep in mind for many-to-many relations.
                                        if (!docToJoinScore.ContainsKey(doc))
                                        {
                                            docToJoinScore[doc] = joinScore;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        toSearcher.Search(new MatchAllDocsQuery(),
                                          new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores,
                                                                                  docToJoinScore));
                    }
                }
                else
                {
                    toSearcher.Search(new MatchAllDocsQuery(),
                                      new CollectorAnonymousInnerClassHelper6(this, toField, joinValueToJoinScores,
                                                                              docToJoinScore));
                }
                queryVals[uniqueRandomValue] = docToJoinScore;
            }

            fromSearcher.IndexReader.Dispose();
            toSearcher.IndexReader.Dispose();

            return(context);
        }
Esempio n. 6
0
        /// <summary>
        /// Entry point to the Compile application.
        /// <para/>
        /// This program takes any number of arguments: the first is the name of the
        /// desired stemming algorithm to use (a list is available in the package
        /// description) , all of the rest should be the path or paths to a file or
        /// files containing a stemmer table to compile.
        /// </summary>
        /// <param name="args">the command line arguments</param>
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                return;
            }

            // LUCENENET NOTE: This line does nothing in .NET
            // and also does nothing in Java...what?
            //args[0].ToUpperInvariant();

            // Reads the first char of the first arg
            backward = args[0][0] == '-';
            int  qq        = (backward) ? 1 : 0;
            bool storeorig = false;

            if (args[0][qq] == '0')
            {
                storeorig = true;
                qq++;
            }

            multi = args[0][qq] == 'M';
            if (multi)
            {
                qq++;
            }
            // LUCENENET specific - reformatted with :
            string charset       = SystemProperties.GetProperty("egothor:stemmer:charset", "UTF-8");
            var    stemmerTables = new List <string>();

            // LUCENENET specific
            // command line argument overrides environment variable or default, if supplied
            for (int i = 1; i < args.Length; i++)
            {
                if ("-e".Equals(args[i], StringComparison.Ordinal) || "--encoding".Equals(args[i], StringComparison.Ordinal))
                {
                    charset = args[i];
                }
                else
                {
                    stemmerTables.Add(args[i]);
                }
            }

            char[] optimizer = new char[args[0].Length - qq];
            for (int i = 0; i < optimizer.Length; i++)
            {
                optimizer[i] = args[0][qq + i];
            }

            foreach (var stemmerTable in stemmerTables)
            {
                // System.out.println("[" + args[i] + "]");
                Diff diff = new Diff();
                //int stems = 0; // not used
                int words = 0;


                AllocTrie();

                Console.WriteLine(stemmerTable);
                using (TextReader input = new StreamReader(
                           new FileStream(stemmerTable, FileMode.Open, FileAccess.Read), Encoding.GetEncoding(charset)))
                {
                    string line;
                    while ((line = input.ReadLine()) != null)
                    {
                        try
                        {
                            line = line.ToLowerInvariant();
                            StringTokenizer st = new StringTokenizer(line);
                            st.MoveNext();
                            string stem = st.Current;
                            if (storeorig)
                            {
                                trie.Add(stem, "-a");
                                words++;
                            }
                            while (st.MoveNext())
                            {
                                string token = st.Current;
                                if (token.Equals(stem, StringComparison.Ordinal) == false)
                                {
                                    trie.Add(token, diff.Exec(token, stem));
                                    words++;
                                }
                            }
                        }
                        catch (InvalidOperationException /*x*/)
                        {
                            // no base token (stem) on a line
                        }
                    }
                }

                Optimizer  o  = new Optimizer();
                Optimizer2 o2 = new Optimizer2();
                Lift       l  = new Lift(true);
                Lift       e  = new Lift(false);
                Gener      g  = new Gener();

                for (int j = 0; j < optimizer.Length; j++)
                {
                    string prefix;
                    switch (optimizer[j])
                    {
                    case 'G':
                        trie   = trie.Reduce(g);
                        prefix = "G: ";
                        break;

                    case 'L':
                        trie   = trie.Reduce(l);
                        prefix = "L: ";
                        break;

                    case 'E':
                        trie   = trie.Reduce(e);
                        prefix = "E: ";
                        break;

                    case '2':
                        trie   = trie.Reduce(o2);
                        prefix = "2: ";
                        break;

                    case '1':
                        trie   = trie.Reduce(o);
                        prefix = "1: ";
                        break;

                    default:
                        continue;
                    }
                    trie.PrintInfo(Console.Out, prefix + " ");
                }

                using DataOutputStream os = new DataOutputStream(
                          new FileStream(stemmerTable + ".out", FileMode.OpenOrCreate, FileAccess.Write));
                os.WriteUTF(args[0]);
                trie.Store(os);
            }
        }
Esempio n. 7
0
        public void TestRandom()
        {
            string[]      terms = new string[TestUtil.NextInt32(Random, 2, 10)];
            ISet <string> seen  = new JCG.HashSet <string>();

            while (seen.size() < terms.Length)
            {
                string token = TestUtil.RandomSimpleString(Random, 1, 5);
                if (!seen.contains(token))
                {
                    terms[seen.size()] = token;
                    seen.add(token);
                }
            }

            Analyzer a = new MockAnalyzer(Random);

            int  numDocs   = AtLeast(10);
            long totTokens = 0;

            string[][] docs = new string[numDocs][];
            for (int i = 0; i < numDocs; i++)
            {
                docs[i] = new string[AtLeast(100)];
                if (Verbose)
                {
                    Console.Write("  doc " + i + ":");
                }
                for (int j = 0; j < docs[i].Length; j++)
                {
                    docs[i][j] = GetZipfToken(terms);
                    if (Verbose)
                    {
                        Console.Write(" " + docs[i][j]);
                    }
                }
                if (Verbose)
                {
                    Console.WriteLine();
                }
                totTokens += docs[i].Length;
            }

            int grams = TestUtil.NextInt32(Random, 1, 4);

            if (Verbose)
            {
                Console.WriteLine("TEST: " + terms.Length + " terms; " + numDocs + " docs; " + grams + " grams");
            }

            // Build suggester model:
            FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte)0x20);

            sug.Build(new TestRandomInputEnumerator(docs));

            // Build inefficient but hopefully correct model:
            List <IDictionary <string, int?> > gramCounts = new List <IDictionary <string, int?> >(grams);

            for (int gram = 0; gram < grams; gram++)
            {
                if (Verbose)
                {
                    Console.WriteLine("TEST: build model for gram=" + gram);
                }
                IDictionary <string, int?> model = new JCG.Dictionary <string, int?>();
                gramCounts.Add(model);
                foreach (string[] doc in docs)
                {
                    for (int i = 0; i < doc.Length - gram; i++)
                    {
                        StringBuilder b = new StringBuilder();
                        for (int j = i; j <= i + gram; j++)
                        {
                            if (j > i)
                            {
                                b.append(' ');
                            }
                            b.append(doc[j]);
                        }
                        string token = b.toString();
                        if (!model.TryGetValue(token, out int?curCount) || curCount == null)
                        {
                            model.Put(token, 1);
                        }
                        else
                        {
                            model.Put(token, 1 + curCount);
                        }
                        if (Verbose)
                        {
                            Console.WriteLine("  add '" + token + "' -> count=" + (model.TryGetValue(token, out int?count) ? (count.HasValue ? count.ToString() : "null") : ""));
                        }
                    }
                }
            }

            int lookups = AtLeast(100);

            for (int iter = 0; iter < lookups; iter++)
            {
                string[] tokens = new string[TestUtil.NextInt32(Random, 1, 5)];
                for (int i = 0; i < tokens.Length; i++)
                {
                    tokens[i] = GetZipfToken(terms);
                }

                // Maybe trim last token; be sure not to create the
                // empty string:
                int trimStart;
                if (tokens.Length == 1)
                {
                    trimStart = 1;
                }
                else
                {
                    trimStart = 0;
                }
                int trimAt = TestUtil.NextInt32(Random, trimStart, tokens[tokens.Length - 1].Length);
                tokens[tokens.Length - 1] = tokens[tokens.Length - 1].Substring(0, trimAt - 0);

                int           num = TestUtil.NextInt32(Random, 1, 100);
                StringBuilder b   = new StringBuilder();
                foreach (string token in tokens)
                {
                    b.append(' ');
                    b.append(token);
                }
                string query = b.toString();
                query = query.Substring(1);

                if (Verbose)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " query='" + query + "' num=" + num);
                }

                // Expected:
                List <Lookup.LookupResult> expected = new List <Lookup.LookupResult>();
                double backoff = 1.0;
                seen = new JCG.HashSet <string>();

                if (Verbose)
                {
                    Console.WriteLine("  compute expected");
                }
                for (int i = grams - 1; i >= 0; i--)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("    grams=" + i);
                    }

                    if (tokens.Length < i + 1)
                    {
                        // Don't have enough tokens to use this model
                        if (Verbose)
                        {
                            Console.WriteLine("      skip");
                        }
                        continue;
                    }

                    if (i == 0 && tokens[tokens.Length - 1].Length == 0)
                    {
                        // Never suggest unigrams from empty string:
                        if (Verbose)
                        {
                            Console.WriteLine("      skip unigram priors only");
                        }
                        continue;
                    }

                    // Build up "context" ngram:
                    b = new StringBuilder();
                    for (int j = tokens.Length - i - 1; j < tokens.Length - 1; j++)
                    {
                        b.append(' ');
                        b.append(tokens[j]);
                    }
                    string context = b.toString();
                    if (context.Length > 0)
                    {
                        context = context.Substring(1);
                    }
                    if (Verbose)
                    {
                        Console.WriteLine("      context='" + context + "'");
                    }
                    long contextCount;
                    if (context.Length == 0)
                    {
                        contextCount = totTokens;
                    }
                    else
                    {
                        //int? count = gramCounts.get(i - 1).get(context);
                        var gramCount = gramCounts[i - 1];
                        if (!gramCount.TryGetValue(context, out int?count) || count == null)
                        {
                            // We never saw this context:
                            backoff *= FreeTextSuggester.ALPHA;
                            if (Verbose)
                            {
                                Console.WriteLine("      skip: never saw context");
                            }
                            continue;
                        }
                        contextCount = count.GetValueOrDefault();
                    }
                    if (Verbose)
                    {
                        Console.WriteLine("      contextCount=" + contextCount);
                    }
                    IDictionary <string, int?> model = gramCounts[i];

                    // First pass, gather all predictions for this model:
                    if (Verbose)
                    {
                        Console.WriteLine("      find terms w/ prefix=" + tokens[tokens.Length - 1]);
                    }
                    List <Lookup.LookupResult> tmp = new List <Lookup.LookupResult>();
                    foreach (string term in terms)
                    {
                        if (term.StartsWith(tokens[tokens.Length - 1], StringComparison.Ordinal))
                        {
                            if (Verbose)
                            {
                                Console.WriteLine("        term=" + term);
                            }
                            if (seen.contains(term))
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine("          skip seen");
                                }
                                continue;
                            }
                            string ngram = (context + " " + term).Trim();
                            //Integer count = model.get(ngram);
                            if (model.TryGetValue(ngram, out int?count) && count != null)
                            {
                                // LUCENENET NOTE: We need to calculate this as decimal because when using double it can sometimes
                                // return numbers that are greater than long.MaxValue, which results in a negative long number.
                                // This is also the way it is being done in the FreeTextSuggester to work around the issue.
                                Lookup.LookupResult lr = new Lookup.LookupResult(ngram, (long)(long.MaxValue * ((decimal)backoff * (decimal)count / contextCount)));
                                tmp.Add(lr);
                                if (Verbose)
                                {
                                    Console.WriteLine("      add tmp key='" + lr.Key + "' score=" + lr.Value);
                                }
                            }
                        }
                    }

                    // Second pass, trim to only top N, and fold those
                    // into overall suggestions:
                    tmp.Sort(byScoreThenKey);
                    if (tmp.size() > num)
                    {
                        //tmp.subList(num, tmp.size()).clear();
                        tmp.RemoveRange(num, tmp.size() - num); // LUCENENET: Converted end index to length
                    }
                    foreach (Lookup.LookupResult result in tmp)
                    {
                        string key = result.Key.toString();
                        int    idx = key.LastIndexOf(' ');
                        string lastToken;
                        if (idx != -1)
                        {
                            lastToken = key.Substring(idx + 1);
                        }
                        else
                        {
                            lastToken = key;
                        }
                        if (!seen.contains(lastToken))
                        {
                            seen.add(lastToken);
                            expected.Add(result);
                            if (Verbose)
                            {
                                Console.WriteLine("      keep key='" + result.Key + "' score=" + result.Value);
                            }
                        }
                    }

                    backoff *= FreeTextSuggester.ALPHA;
                }

                expected.Sort(byScoreThenKey);

                if (expected.size() > num)
                {
                    expected.RemoveRange(num, expected.size() - num); // LUCENENET: Converted end index to length
                }

                // Actual:
                IList <Lookup.LookupResult> actual = sug.DoLookup(query, num);

                if (Verbose)
                {
                    Console.WriteLine("  expected: " + expected);
                    Console.WriteLine("    actual: " + actual);
                }

                assertEquals(expected.ToString(), actual.ToString());
            }
        }
        public virtual TokenInfoDictionaryWriter BuildDictionary(IList <string> csvFiles)
        {
            TokenInfoDictionaryWriter dictionary = new TokenInfoDictionaryWriter(10 * 1024 * 1024);

            // all lines in the file
            Console.WriteLine("  parse...");
            List <string[]> lines = new List <string[]>(400000);

            foreach (string file in csvFiles)
            {
                using (Stream inputStream = new FileStream(file, FileMode.Open, FileAccess.Read))
                {
                    Encoding   decoder = Encoding.GetEncoding(encoding);
                    TextReader reader  = new StreamReader(inputStream, decoder);

                    string line = null;
                    while ((line = reader.ReadLine()) != null)
                    {
                        string[] entry = CSVUtil.Parse(line);

                        if (entry.Length < 13)
                        {
                            Console.WriteLine("Entry in CSV is not valid: " + line);
                            continue;
                        }

                        string[] formatted = FormatEntry(entry);
                        lines.Add(formatted);

                        // NFKC normalize dictionary entry
                        if (normalizeEntries)
                        {
                            //if (normalizer.isNormalized(entry[0])){
                            if (entry[0].IsNormalized(NormalizationForm.FormKC))
                            {
                                continue;
                            }
                            string[] normalizedEntry = new string[entry.Length];
                            for (int i = 0; i < entry.Length; i++)
                            {
                                //normalizedEntry[i] = normalizer.normalize(entry[i]);
                                normalizedEntry[i] = entry[i].Normalize(NormalizationForm.FormKC);
                            }

                            formatted = FormatEntry(normalizedEntry);
                            lines.Add(formatted);
                        }
                    }
                }
            }

            Console.WriteLine("  sort...");

            // sort by term: we sorted the files already and use a stable sort.
            lines.Sort(new ComparerAnonymousHelper());

            Console.WriteLine("  encode...");

            PositiveInt32Outputs fstOutput  = PositiveInt32Outputs.Singleton;
            Builder <long?>      fstBuilder = new Builder <long?>(Lucene.Net.Util.Fst.FST.INPUT_TYPE.BYTE2, 0, 0, true, true, int.MaxValue, fstOutput, null, true, PackedInt32s.DEFAULT, true, 15);
            Int32sRef            scratch    = new Int32sRef();
            long   ord       = -1; // first ord will be 0
            string lastValue = null;

            // build tokeninfo dictionary
            foreach (string[] entry in lines)
            {
                int next = dictionary.Put(entry);

                if (next == offset)
                {
                    Console.WriteLine("Failed to process line: " + Collections.ToString(entry));
                    continue;
                }

                string token = entry[0];
                if (!token.Equals(lastValue, StringComparison.Ordinal))
                {
                    // new word to add to fst
                    ord++;
                    lastValue = token;
                    scratch.Grow(token.Length);
                    scratch.Length = token.Length;
                    for (int i = 0; i < token.Length; i++)
                    {
                        scratch.Int32s[i] = (int)token[i];
                    }
                    fstBuilder.Add(scratch, ord);
                }
                dictionary.AddMapping((int)ord, offset);
                offset = next;
            }

            FST <long?> fst = fstBuilder.Finish();

            Console.WriteLine("  " + fst.NodeCount + " nodes, " + fst.ArcCount + " arcs, " + fst.GetSizeInBytes() + " bytes...  ");
            dictionary.SetFST(fst);
            Console.WriteLine(" done");

            return(dictionary);
        }
 public override void Run()
 {
     if (Verbose)
     {
         Console.WriteLine(Thread.CurrentThread.Name + ": launch search thread");
     }
     while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTimeMS) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
     {
         try
         {
             IndexSearcher s = outerInstance.GetCurrentSearcher();
             try
             {
                 // Verify 1) IW is correctly setting
                 // diagnostics, and 2) segment warming for
                 // merged segments is actually happening:
                 foreach (AtomicReaderContext sub in s.IndexReader.Leaves)
                 {
                     SegmentReader segReader = (SegmentReader)sub.Reader;
                     IDictionary <string, string> diagnostics = segReader.SegmentInfo.Info.Diagnostics;
                     assertNotNull(diagnostics);
                     diagnostics.TryGetValue("source", out string source);
                     assertNotNull(source);
                     if (source.Equals("merge", StringComparison.Ordinal))
                     {
                         assertTrue("sub reader " + sub + " wasn't warmed: warmed=" + outerInstance.warmed + " diagnostics=" + diagnostics + " si=" + segReader.SegmentInfo,
                                    // LUCENENET: ConditionalWeakTable doesn't have ContainsKey, so we normalize to TryGetValue
                                    !outerInstance.m_assertMergedSegmentsWarmed || outerInstance.warmed.TryGetValue(segReader.core, out BooleanRef _));
                     }
                 }
                 if (s.IndexReader.NumDocs > 0)
                 {
                     outerInstance.SmokeTestSearcher(s);
                     Fields fields = MultiFields.GetFields(s.IndexReader);
                     if (fields == null)
                     {
                         continue;
                     }
                     Terms terms = fields.GetTerms("body");
                     if (terms == null)
                     {
                         continue;
                     }
                     TermsEnum termsEnum     = terms.GetEnumerator();
                     int       seenTermCount = 0;
                     int       shift;
                     int       trigger;
                     if (totTermCount < 30)
                     {
                         shift   = 0;
                         trigger = 1;
                     }
                     else
                     {
                         trigger = totTermCount / 30;
                         shift   = Random.Next(trigger);
                     }
                     while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTimeMS) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                     {
                         if (!termsEnum.MoveNext())
                         {
                             totTermCount.Value = seenTermCount;
                             break;
                         }
                         seenTermCount++;
                         // search 30 terms
                         if ((seenTermCount + shift) % trigger == 0)
                         {
                             //if (VERBOSE) {
                             //System.out.println(Thread.currentThread().getName() + " now search body:" + term.Utf8ToString());
                             //}
                             totHits.AddAndGet(outerInstance.RunQuery(s, new TermQuery(new Term("body", termsEnum.Term))));
                         }
                     }
                     //if (VERBOSE) {
                     //System.out.println(Thread.currentThread().getName() + ": search done");
                     //}
                 }
             }
             finally
             {
                 outerInstance.ReleaseSearcher(s);
             }
         }
         catch (Exception t) when(t.IsThrowable())
         {
             Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
             outerInstance.m_failed.Value = (true);
             Console.WriteLine(t.ToString());
             throw RuntimeException.Create(t);
         }
     }
 }
Esempio n. 10
0
        public void TestRandom()
        {
            int numberOfRuns = TestUtil.NextInt32(Random, 3, 6);

            for (int iter = 0; iter < numberOfRuns; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine(string.Format("TEST: iter={0} total={1}", iter, numberOfRuns));
                }

                int numDocs   = TestUtil.NextInt32(Random, 100, 1000) * RANDOM_MULTIPLIER;
                int numGroups = TestUtil.NextInt32(Random, 1, numDocs);

                if (VERBOSE)
                {
                    Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
                }

                List <BytesRef> groups = new List <BytesRef>();
                for (int i = 0; i < numGroups; i++)
                {
                    string randomValue;
                    do
                    {
                        // B/c of DV based impl we can't see the difference between an empty string and a null value.
                        // For that reason we don't generate empty string groups.
                        randomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    } while ("".Equals(randomValue, StringComparison.Ordinal));
                    groups.Add(new BytesRef(randomValue));
                }
                string[] contentStrings = new string[TestUtil.NextInt32(Random, 2, 20)];
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: create fake content");
                }
                for (int contentIDX = 0; contentIDX < contentStrings.Length; contentIDX++)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.append("real").append(Random.nextInt(3)).append(' ');
                    int fakeCount = Random.nextInt(10);
                    for (int fakeIDX = 0; fakeIDX < fakeCount; fakeIDX++)
                    {
                        sb.append("fake ");
                    }
                    contentStrings[contentIDX] = sb.toString();
                    if (VERBOSE)
                    {
                        Console.WriteLine("  content=" + sb.toString());
                    }
                }

                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(
                    Random,
                    dir,
                    NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                         new MockAnalyzer(Random)));
                bool          preFlex   = "Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal);
                bool          canUseIDV = !preFlex;
                DocValuesType valueType = vts[Random.nextInt(vts.Length)];

                Document doc        = new Document();
                Document docNoGroup = new Document();
                Field    group      = NewStringField("group", "", Field.Store.NO);
                doc.Add(group);
                Field valuesField = null;
                if (canUseIDV)
                {
                    switch (valueType)
                    {
                    case DocValuesType.BINARY:
                        valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
                        break;

                    case DocValuesType.SORTED:
                        valuesField = new SortedDocValuesField("group_dv", new BytesRef());
                        break;

                    default:
                        fail("unhandled type");
                        break;
                    }
                    doc.Add(valuesField);
                }
                Field sort1 = NewStringField("sort1", "", Field.Store.NO);
                doc.Add(sort1);
                docNoGroup.Add(sort1);
                Field sort2 = NewStringField("sort2", "", Field.Store.NO);
                doc.Add(sort2);
                docNoGroup.Add(sort2);
                Field sort3 = NewStringField("sort3", "", Field.Store.NO);
                doc.Add(sort3);
                docNoGroup.Add(sort3);
                Field content = NewTextField("content", "", Field.Store.NO);
                doc.Add(content);
                docNoGroup.Add(content);
                Int32Field id = new Int32Field("id", 0, Field.Store.NO);
                doc.Add(id);
                docNoGroup.Add(id);
                GroupDoc[] groupDocs = new GroupDoc[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    BytesRef groupValue;
                    if (Random.nextInt(24) == 17)
                    {
                        // So we test the "doc doesn't have the group'd
                        // field" case:
                        groupValue = null;
                    }
                    else
                    {
                        groupValue = groups[Random.nextInt(groups.size())];
                    }

                    GroupDoc groupDoc = new GroupDoc(
                        i,
                        groupValue,
                        groups[Random.nextInt(groups.size())],
                        groups[Random.nextInt(groups.size())],
                        new BytesRef(string.Format(CultureInfo.InvariantCulture, "{0:D5}", i)),
                        contentStrings[Random.nextInt(contentStrings.Length)]
                        );

                    if (VERBOSE)
                    {
                        Console.WriteLine("  doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.Utf8ToString()) + " sort1=" + groupDoc.sort1.Utf8ToString() + " sort2=" + groupDoc.sort2.Utf8ToString() + " sort3=" + groupDoc.sort3.Utf8ToString());
                    }

                    groupDocs[i] = groupDoc;
                    if (groupDoc.group != null)
                    {
                        group.SetStringValue(groupDoc.group.Utf8ToString());
                        if (canUseIDV)
                        {
                            valuesField.SetBytesValue(new BytesRef(groupDoc.group.Utf8ToString()));
                        }
                    }
                    sort1.SetStringValue(groupDoc.sort1.Utf8ToString());
                    sort2.SetStringValue(groupDoc.sort2.Utf8ToString());
                    sort3.SetStringValue(groupDoc.sort3.Utf8ToString());
                    content.SetStringValue(groupDoc.content);
                    id.SetInt32Value(groupDoc.id);
                    if (groupDoc.group == null)
                    {
                        w.AddDocument(docNoGroup);
                    }
                    else
                    {
                        w.AddDocument(doc);
                    }
                }

                DirectoryReader r = w.GetReader();
                w.Dispose();

                // NOTE: intentional but temporary field cache insanity!
                FieldCache.Int32s docIdToFieldId = FieldCache.DEFAULT.GetInt32s(SlowCompositeReaderWrapper.Wrap(r), "id", false);
                int[]             fieldIdToDocID = new int[numDocs];
                for (int i = 0; i < numDocs; i++)
                {
                    int fieldId = docIdToFieldId.Get(i);
                    fieldIdToDocID[fieldId] = i;
                }

                try
                {
                    IndexSearcher s = NewSearcher(r);
                    if (typeof(SlowCompositeReaderWrapper).GetTypeInfo().IsAssignableFrom(s.IndexReader.GetType()))
                    {
                        canUseIDV = false;
                    }
                    else
                    {
                        canUseIDV = !preFlex;
                    }

                    for (int contentID = 0; contentID < 3; contentID++)
                    {
                        ScoreDoc[] hits = s.Search(new TermQuery(new Term("content", "real" + contentID)), numDocs).ScoreDocs;
                        foreach (ScoreDoc hit in hits)
                        {
                            GroupDoc gd = groupDocs[docIdToFieldId.Get(hit.Doc)];
                            assertTrue(gd.score == 0.0);
                            gd.score = hit.Score;
                            int docId = gd.id;
                            assertEquals(docId, docIdToFieldId.Get(hit.Doc));
                        }
                    }

                    foreach (GroupDoc gd in groupDocs)
                    {
                        assertTrue(gd.score != 0.0);
                    }

                    for (int searchIter = 0; searchIter < 100; searchIter++)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: searchIter=" + searchIter);
                        }

                        string searchTerm      = "real" + Random.nextInt(3);
                        bool   sortByScoreOnly = Random.nextBoolean();
                        Sort   sortWithinGroup = GetRandomSort(sortByScoreOnly);
                        AbstractAllGroupHeadsCollector allGroupHeadsCollector = CreateRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
                        s.Search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
                        int[] expectedGroupHeads = CreateExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
                        int[] actualGroupHeads   = allGroupHeadsCollector.RetrieveGroupHeads();
                        // The actual group heads contains Lucene ids. Need to change them into our id value.
                        for (int i = 0; i < actualGroupHeads.Length; i++)
                        {
                            actualGroupHeads[i] = docIdToFieldId.Get(actualGroupHeads[i]);
                        }
                        // Allows us the easily iterate and assert the actual and expected results.
                        Array.Sort(expectedGroupHeads);
                        Array.Sort(actualGroupHeads);

                        if (VERBOSE)
                        {
                            Console.WriteLine("Collector: " + allGroupHeadsCollector.GetType().Name);
                            Console.WriteLine("Sort within group: " + sortWithinGroup);
                            Console.WriteLine("Num group: " + numGroups);
                            Console.WriteLine("Num doc: " + numDocs);
                            Console.WriteLine("\n=== Expected: \n");
                            foreach (int expectedDocId in expectedGroupHeads)
                            {
                                GroupDoc expectedGroupDoc = groupDocs[expectedDocId];
                                string   expectedGroup    = expectedGroupDoc.group == null ? null : expectedGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  expectedGroup, expectedGroupDoc.score, expectedGroupDoc.sort1.Utf8ToString(),
                                                  expectedGroupDoc.sort2.Utf8ToString(), expectedGroupDoc.sort3.Utf8ToString(), expectedDocId)
                                    );
                            }
                            Console.WriteLine("\n=== Actual: \n");
                            foreach (int actualDocId in actualGroupHeads)
                            {
                                GroupDoc actualGroupDoc = groupDocs[actualDocId];
                                string   actualGroup    = actualGroupDoc.group == null ? null : actualGroupDoc.group.Utf8ToString();
                                Console.WriteLine(
                                    string.Format(CultureInfo.InvariantCulture,
                                                  "Group:{0,10} score{1:0.0#######,5} Sort1:{2,10} Sort2:{3,10} Sort3:{4,10} doc:{5,10}",
                                                  actualGroup, actualGroupDoc.score, actualGroupDoc.sort1.Utf8ToString(),
                                                  actualGroupDoc.sort2.Utf8ToString(), actualGroupDoc.sort3.Utf8ToString(), actualDocId)
                                    );
                            }
                            Console.WriteLine("\n===================================================================================");
                        }

                        assertArrayEquals(expectedGroupHeads, actualGroupHeads);
                    }
                }
                finally
                {
                    QueryUtils.PurgeFieldCache(r);
                }

                r.Dispose();
                dir.Dispose();
            }
        }
            public override void Run()
            {
                // TODO: would be better if this were cross thread, so that we make sure one thread deleting anothers added docs works:
                IList <string>  toDeleteIDs     = new JCG.List <string>();
                IList <SubDocs> toDeleteSubDocs = new JCG.List <SubDocs>();

                while (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond < stopTime && !outerInstance.m_failed) // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                {
                    try
                    {
                        // Occasional longish pause if running
                        // nightly
                        if (LuceneTestCase.TestNightly && Random.Next(6) == 3)
                        {
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": now long sleep");
                            }
                            //Thread.Sleep(TestUtil.NextInt32(Random, 50, 500));
                            // LUCENENET specific - Reduced amount of pause to keep the total
                            // Nightly test time under 1 hour
                            Thread.Sleep(TestUtil.NextInt32(Random, 50, 250));
                        }

                        // Rate limit ingest rate:
                        if (Random.Next(7) == 5)
                        {
                            Thread.Sleep(TestUtil.NextInt32(Random, 1, 10));
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": done sleep");
                            }
                        }

                        Document doc = docs.NextDoc();
                        if (doc == null)
                        {
                            break;
                        }

                        // Maybe add randomly named field
                        string addedField;
                        if (Random.NextBoolean())
                        {
                            addedField = "extra" + Random.Next(40);
                            doc.Add(NewTextField(addedField, "a random field", Field.Store.YES));
                        }
                        else
                        {
                            addedField = null;
                        }

                        if (Random.NextBoolean())
                        {
                            if (Random.NextBoolean())
                            {
                                // Add/update doc block:
                                string  packID;
                                SubDocs delSubDocs;
                                if (toDeleteSubDocs.Count > 0 && Random.NextBoolean())
                                {
                                    delSubDocs = toDeleteSubDocs[Random.Next(toDeleteSubDocs.Count)];
                                    if (Debugging.AssertsEnabled)
                                    {
                                        Debugging.Assert(!delSubDocs.Deleted);
                                    }
                                    toDeleteSubDocs.Remove(delSubDocs);
                                    // Update doc block, replacing prior packID
                                    packID = delSubDocs.PackID;
                                }
                                else
                                {
                                    delSubDocs = null;
                                    // Add doc block, using new packID
                                    packID = outerInstance.m_packCount.GetAndIncrement().ToString(CultureInfo.InvariantCulture);
                                }

                                Field            packIDField = NewStringField("packID", packID, Field.Store.YES);
                                IList <string>   docIDs      = new JCG.List <string>();
                                SubDocs          subDocs     = new SubDocs(packID, docIDs);
                                IList <Document> docsList    = new JCG.List <Document>();

                                allSubDocs.Enqueue(subDocs);
                                doc.Add(packIDField);
                                docsList.Add(TestUtil.CloneDocument(doc));
                                docIDs.Add(doc.Get("docid"));

                                int maxDocCount = TestUtil.NextInt32(Random, 1, 10);
                                while (docsList.Count < maxDocCount)
                                {
                                    doc = docs.NextDoc();
                                    if (doc == null)
                                    {
                                        break;
                                    }
                                    docsList.Add(TestUtil.CloneDocument(doc));
                                    docIDs.Add(doc.Get("docid"));
                                }
                                outerInstance.m_addCount.AddAndGet(docsList.Count);

                                Term packIDTerm = new Term("packID", packID);

                                if (delSubDocs != null)
                                {
                                    delSubDocs.Deleted = true;
                                    delIDs.UnionWith(delSubDocs.SubIDs);
                                    outerInstance.m_delCount.AddAndGet(delSubDocs.SubIDs.Count);
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": update pack packID=" + delSubDocs.PackID +
                                                          " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs));
                                    }
                                    outerInstance.UpdateDocuments(packIDTerm, docsList);
                                }
                                else
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": add pack packID=" + packID +
                                                          " count=" + docsList.Count + " docs=" + string.Format(J2N.Text.StringFormatter.InvariantCulture, "{0}", docIDs));
                                    }
                                    outerInstance.AddDocuments(packIDTerm, docsList);
                                }
                                doc.RemoveField("packID");

                                if (Random.Next(5) == 2)
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + packID);
                                    }
                                    toDeleteSubDocs.Add(subDocs);
                                }
                            }
                            else
                            {
                                // Add single doc
                                string docid = doc.Get("docid");
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": add doc docid:" + docid);
                                }
                                outerInstance.AddDocument(new Term("docid", docid), doc);
                                outerInstance.m_addCount.GetAndIncrement();

                                if (Random.Next(5) == 3)
                                {
                                    if (Verbose)
                                    {
                                        Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid"));
                                    }
                                    toDeleteIDs.Add(docid);
                                }
                            }
                        }
                        else
                        {
                            // Update single doc, but we never re-use
                            // and ID so the delete will never
                            // actually happen:
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": update doc id:" + doc.Get("docid"));
                            }
                            string docid = doc.Get("docid");
                            outerInstance.UpdateDocument(new Term("docid", docid), doc);
                            outerInstance.m_addCount.GetAndIncrement();

                            if (Random.Next(5) == 3)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": buffer del id:" + doc.Get("docid"));
                                }
                                toDeleteIDs.Add(docid);
                            }
                        }

                        if (Random.Next(30) == 17)
                        {
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": apply " + toDeleteIDs.Count + " deletes");
                            }
                            foreach (string id in toDeleteIDs)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": del term=id:" + id);
                                }
                                outerInstance.DeleteDocuments(new Term("docid", id));
                            }
                            int count = outerInstance.m_delCount.AddAndGet(toDeleteIDs.Count);
                            if (Verbose)
                            {
                                Console.WriteLine(Thread.CurrentThread.Name + ": tot " + count + " deletes");
                            }
                            delIDs.UnionWith(toDeleteIDs);
                            toDeleteIDs.Clear();

                            foreach (SubDocs subDocs in toDeleteSubDocs)
                            {
                                if (Debugging.AssertsEnabled)
                                {
                                    Debugging.Assert(!subDocs.Deleted);
                                }
                                delPackIDs.Add(subDocs.PackID);
                                outerInstance.DeleteDocuments(new Term("packID", subDocs.PackID));
                                subDocs.Deleted = true;
                                if (Verbose)
                                {
                                    Console.WriteLine(Thread.CurrentThread.Name + ": del subs: " + subDocs.SubIDs + " packID=" + subDocs.PackID);
                                }
                                delIDs.UnionWith(subDocs.SubIDs);
                                outerInstance.m_delCount.AddAndGet(subDocs.SubIDs.Count);
                            }
                            toDeleteSubDocs.Clear();
                        }
                        if (addedField != null)
                        {
                            doc.RemoveField(addedField);
                        }
                    }
                    catch (Exception t) when(t.IsThrowable())
                    {
                        Console.WriteLine(Thread.CurrentThread.Name + ": hit exc");
                        Console.WriteLine(t.ToString());
                        Console.Write(t.StackTrace);
                        outerInstance.m_failed.Value = (true);
                        throw RuntimeException.Create(t);
                    }
                }
                if (Verbose)
                {
                    Console.WriteLine(Thread.CurrentThread.Name + ": indexing done");
                }

                outerInstance.DoAfterIndexingThreadDone();
            }
Esempio n. 12
0
            public virtual void IndexDoc()
            {
                Document d = new Document();

                FieldType customType1 = new FieldType(TextField.TYPE_STORED);

                customType1.IsTokenized = false;
                customType1.OmitNorms   = true;

                List <Field> fields   = new List <Field>();
                string       idString = IdString;
                Field        idField  = NewField("id", idString, customType1);

                fields.Add(idField);

                int nFields = NextInt(MaxFields);

                for (int i = 0; i < nFields; i++)
                {
                    FieldType customType = new FieldType();
                    switch (NextInt(4))
                    {
                    case 0:
                        break;

                    case 1:
                        customType.StoreTermVectors = true;
                        break;

                    case 2:
                        customType.StoreTermVectors         = true;
                        customType.StoreTermVectorPositions = true;
                        break;

                    case 3:
                        customType.StoreTermVectors       = true;
                        customType.StoreTermVectorOffsets = true;
                        break;
                    }

                    switch (NextInt(4))
                    {
                    case 0:
                        customType.IsStored  = true;
                        customType.OmitNorms = true;
                        customType.IsIndexed = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(1), customType));
                        break;

                    case 1:
                        customType.IsIndexed   = true;
                        customType.IsTokenized = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(0), customType));
                        break;

                    case 2:
                        customType.IsStored                 = true;
                        customType.StoreTermVectors         = false;
                        customType.StoreTermVectorOffsets   = false;
                        customType.StoreTermVectorPositions = false;
                        fields.Add(NewField("f" + NextInt(100), GetString(0), customType));
                        break;

                    case 3:
                        customType.IsStored    = true;
                        customType.IsIndexed   = true;
                        customType.IsTokenized = true;
                        fields.Add(NewField("f" + NextInt(100), GetString(BigFieldSize), customType));
                        break;
                    }
                }

                if (SameFieldOrder)
                {
                    fields.Sort(fieldNameComparer);
                }
                else
                {
                    // random placement of id field also
                    fields.Swap(NextInt(fields.Count), 0);
                }

                for (int i = 0; i < fields.Count; i++)
                {
                    d.Add(fields[i]);
                }
                if (VERBOSE)
                {
                    Console.WriteLine(Thread.CurrentThread.Name + ": indexing id:" + idString);
                }
                w.UpdateDocument(new Term("id", idString), d);
                //System.out.println(Thread.currentThread().getName() + ": indexing "+d);
                Docs[idString] = d;
            }
Esempio n. 13
0
        public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField)
        {
            if (VERBOSE)
            {
                Console.WriteLine("\nr1 docs:");
                PrintDocs(r1);
                Console.WriteLine("\nr2 docs:");
                PrintDocs(r2);
            }
            if (r1.NumDocs != r2.NumDocs)
            {
                Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs);
            }
            bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc);

            int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping

            // create mapping from id2 space to id2 based on idField
            Fields f1 = MultiFields.GetFields(r1);

            if (f1 == null)
            {
                // make sure r2 is empty
                Assert.IsNull(MultiFields.GetFields(r2));
                return;
            }
            Terms terms1 = f1.GetTerms(idField);

            if (terms1 == null)
            {
                Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null);
                return;
            }
            TermsEnum termsEnum = terms1.GetIterator(null);

            IBits liveDocs1 = MultiFields.GetLiveDocs(r1);
            IBits liveDocs2 = MultiFields.GetLiveDocs(r2);

            Fields fields = MultiFields.GetFields(r2);

            if (fields == null)
            {
                // make sure r1 is in fact empty (eg has only all
                // deleted docs):
                IBits    liveDocs = MultiFields.GetLiveDocs(r1);
                DocsEnum docs     = null;
                while (termsEnum.Next() != null)
                {
                    docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE);
                    while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        Assert.Fail("r1 is not empty but r2 is");
                    }
                }
                return;
            }
            Terms     terms2     = fields.GetTerms(idField);
            TermsEnum termsEnum2 = terms2.GetIterator(null);

            DocsEnum termDocs1 = null;
            DocsEnum termDocs2 = null;

            while (true)
            {
                BytesRef term = termsEnum.Next();
                //System.out.println("TEST: match id term=" + term);
                if (term == null)
                {
                    break;
                }

                termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE);
                if (termsEnum2.SeekExact(term))
                {
                    termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE);
                }
                else
                {
                    termDocs2 = null;
                }

                if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                {
                    // this doc is deleted and wasn't replaced
                    Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS);
                    continue;
                }

                int id1 = termDocs1.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc());

                Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                int id2 = termDocs2.DocID;
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc());

                r2r1[id2] = id1;

                // verify stored fields are equivalent
                try
                {
                    VerifyEquals(r1.Document(id1), r2.Document(id2));
                }
                catch (Exception /*t*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term);
                    Console.WriteLine("  d1=" + r1.Document(id1));
                    Console.WriteLine("  d2=" + r2.Document(id2));
                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }

                try
                {
                    // verify term vectors are equivalent
                    VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2));
                }
                catch (Exception /*e*/)
                {
                    Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2);
                    Fields tv1 = r1.GetTermVectors(id1);
                    Console.WriteLine("  d1=" + tv1);
                    if (tv1 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv1)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv1.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    Fields tv2 = r2.GetTermVectors(id2);
                    Console.WriteLine("  d2=" + tv2);
                    if (tv2 != null)
                    {
                        DocsAndPositionsEnum dpEnum = null;
                        DocsEnum             dEnum  = null;
                        foreach (string field in tv2)
                        {
                            Console.WriteLine("    " + field + ":");
                            Terms terms3 = tv2.GetTerms(field);
                            Assert.IsNotNull(terms3);
                            TermsEnum termsEnum3 = terms3.GetIterator(null);
                            BytesRef  term2;
                            while ((term2 = termsEnum3.Next()) != null)
                            {
                                Console.WriteLine("      " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq);
                                dpEnum = termsEnum3.DocsAndPositions(null, dpEnum);
                                if (dpEnum != null)
                                {
                                    Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dpEnum.Freq;
                                    Console.WriteLine("        doc=" + dpEnum.DocID + " freq=" + freq);
                                    for (int posUpto = 0; posUpto < freq; posUpto++)
                                    {
                                        Console.WriteLine("          pos=" + dpEnum.NextPosition());
                                    }
                                }
                                else
                                {
                                    dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS);
                                    Assert.IsNotNull(dEnum);
                                    Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                                    int freq = dEnum.Freq;
                                    Console.WriteLine("        doc=" + dEnum.DocID + " freq=" + freq);
                                }
                            }
                        }
                    }

                    throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details)
                }
            }

            //System.out.println("TEST: done match id");

            // Verify postings
            //System.out.println("TEST: create te1");
            Fields fields1 = MultiFields.GetFields(r1);
            IEnumerator <string> fields1Enum = fields1.GetEnumerator();
            Fields fields2 = MultiFields.GetFields(r2);
            IEnumerator <string> fields2Enum = fields2.GetEnumerator();

            string    field1 = null, field2 = null;
            TermsEnum termsEnum1 = null;

            termsEnum2 = null;
            DocsEnum docs1 = null, docs2 = null;

            // pack both doc and freq into single element for easy sorting
            long[] info1 = new long[r1.NumDocs];
            long[] info2 = new long[r2.NumDocs];

            for (; ;)
            {
                BytesRef term1 = null, term2 = null;

                // iterate until we get some docs
                int len1;
                for (; ;)
                {
                    len1 = 0;
                    if (termsEnum1 == null)
                    {
                        if (!fields1Enum.MoveNext())
                        {
                            break;
                        }
                        field1 = fields1Enum.Current;
                        Terms terms = fields1.GetTerms(field1);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum1 = terms.GetIterator(null);
                    }
                    term1 = termsEnum1.Next();
                    if (term1 == null)
                    {
                        // no more terms in this field
                        termsEnum1 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS);
                    while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = docs1.DocID;
                        int f = docs1.Freq;
                        info1[len1] = (((long)d) << 32) | (uint)f;
                        len1++;
                    }
                    if (len1 > 0)
                    {
                        break;
                    }
                }

                // iterate until we get some docs
                int len2;
                for (; ;)
                {
                    len2 = 0;
                    if (termsEnum2 == null)
                    {
                        if (!fields2Enum.MoveNext())
                        {
                            break;
                        }
                        field2 = fields2Enum.Current;
                        Terms terms = fields2.GetTerms(field2);
                        if (terms == null)
                        {
                            continue;
                        }
                        termsEnum2 = terms.GetIterator(null);
                    }
                    term2 = termsEnum2.Next();
                    if (term2 == null)
                    {
                        // no more terms in this field
                        termsEnum2 = null;
                        continue;
                    }

                    //System.out.println("TEST: term1=" + term1);
                    docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS);
                    while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        int d = r2r1[docs2.DocID];
                        int f = docs2.Freq;
                        info2[len2] = (((long)d) << 32) | (uint)f;
                        len2++;
                    }
                    if (len2 > 0)
                    {
                        break;
                    }
                }

                Assert.AreEqual(len1, len2);
                if (len1 == 0) // no more terms
                {
                    break;
                }

                Assert.AreEqual(field1, field2);
                Assert.IsTrue(term1.BytesEquals(term2));

                if (!hasDeletes)
                {
                    Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq);
                }

                Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes);

                // sort info2 to get it into ascending docid
                Array.Sort(info2, 0, len2);

                // now compare
                for (int i = 0; i < len1; i++)
                {
                    Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString());
                }
            }
        }
Esempio n. 14
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            ANALYZER = new MockAnalyzer(Random);

            qp = new StandardQueryParser(ANALYZER);

            IDictionary <string, /*Number*/ object> randomNumberMap = new JCG.Dictionary <string, object>();

            /*SimpleDateFormat*/
            string dateFormat;
            long   randomDate;
            bool   dateFormatSanityCheckPass;
            int    count = 0;

            do
            {
                if (count > 100)
                {
                    fail("This test has problems to find a sane random DateFormat/NumberFormat. Stopped trying after 100 iterations.");
                }

                dateFormatSanityCheckPass = true;
                LOCALE     = RandomCulture(Random);
                TIMEZONE   = RandomTimeZone(Random);
                DATE_STYLE = randomDateStyle(Random);
                TIME_STYLE = randomDateStyle(Random);

                //// assumes localized date pattern will have at least year, month, day,
                //// hour, minute
                //dateFormat = (SimpleDateFormat)DateFormat.getDateTimeInstance(
                //    DATE_STYLE, TIME_STYLE, LOCALE);

                //// not all date patterns includes era, full year, timezone and second,
                //// so we add them here
                //dateFormat.applyPattern(dateFormat.toPattern() + " G s Z yyyy");
                //dateFormat.setTimeZone(TIMEZONE);

                // assumes localized date pattern will have at least year, month, day,
                // hour, minute
                DATE_FORMAT = new NumberDateFormat(DATE_STYLE, TIME_STYLE, LOCALE)
                {
                    TimeZone = TIMEZONE
                };

                // not all date patterns includes era, full year, timezone and second,
                // so we add them here
                DATE_FORMAT.SetDateFormat(DATE_FORMAT.GetDateFormat() + " g s z yyyy");


                dateFormat = DATE_FORMAT.GetDateFormat();

                do
                {
                    randomDate = Random.nextLong();

                    // prune date value so it doesn't pass in insane values to some
                    // calendars.
                    randomDate = randomDate % 3400000000000L;

                    // truncate to second
                    randomDate = (randomDate / 1000L) * 1000L;

                    // only positive values
                    randomDate = Math.Abs(randomDate);
                } while (randomDate == 0L);

                dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, randomDate);

                dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, 0);

                dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat,
                                                                   -randomDate);

                count++;
            } while (!dateFormatSanityCheckPass);

            //NUMBER_FORMAT = NumberFormat.getNumberInstance(LOCALE);
            //NUMBER_FORMAT.setMaximumFractionDigits((Random().nextInt() & 20) + 1);
            //NUMBER_FORMAT.setMinimumFractionDigits((Random().nextInt() & 20) + 1);
            //NUMBER_FORMAT.setMaximumIntegerDigits((Random().nextInt() & 20) + 1);
            //NUMBER_FORMAT.setMinimumIntegerDigits((Random().nextInt() & 20) + 1);

            NUMBER_FORMAT = new NumberFormat(LOCALE);

            double randomDouble;
            long   randomLong;
            int    randomInt;
            float  randomFloat;

            while ((randomLong = Convert.ToInt64(NormalizeNumber(Math.Abs(Random.nextLong()))
                                                 )) == 0L)
            {
                ;
            }
            while ((randomDouble = Convert.ToDouble(NormalizeNumber(Math.Abs(Random.NextDouble()))
                                                    )) == 0.0)
            {
                ;
            }
            while ((randomFloat = Convert.ToSingle(NormalizeNumber(Math.Abs(Random.nextFloat()))
                                                   )) == 0.0f)
            {
                ;
            }
            while ((randomInt = Convert.ToInt32(NormalizeNumber(Math.Abs(Random.nextInt())))) == 0)
            {
                ;
            }

            randomNumberMap.Put(NumericType.INT64.ToString(), randomLong);
            randomNumberMap.Put(NumericType.INT32.ToString(), randomInt);
            randomNumberMap.Put(NumericType.SINGLE.ToString(), randomFloat);
            randomNumberMap.Put(NumericType.DOUBLE.ToString(), randomDouble);
            randomNumberMap.Put(DATE_FIELD_NAME, randomDate);

            RANDOM_NUMBER_MAP = randomNumberMap.AsReadOnly();

            directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, directory,
                                                             NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                                             .SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000))
                                                             .SetMergePolicy(NewLogMergePolicy()));

            Document doc = new Document();
            IDictionary <String, NumericConfig> numericConfigMap = new JCG.Dictionary <String, NumericConfig>();
            IDictionary <String, Field>         numericFieldMap  = new JCG.Dictionary <String, Field>();

            qp.NumericConfigMap = (numericConfigMap);

            foreach (NumericType type in Enum.GetValues(typeof(NumericType)))
            {
                if (type == NumericType.NONE)
                {
                    continue;
                }

                numericConfigMap.Put(type.ToString(), new NumericConfig(PRECISION_STEP,
                                                                        NUMBER_FORMAT, type));

                FieldType ft2 = new FieldType(Int32Field.TYPE_NOT_STORED);
                ft2.NumericType          = (type);
                ft2.IsStored             = (true);
                ft2.NumericPrecisionStep = (PRECISION_STEP);
                ft2.Freeze();
                Field field;

                switch (type)
                {
                case NumericType.INT32:
                    field = new Int32Field(type.ToString(), 0, ft2);
                    break;

                case NumericType.SINGLE:
                    field = new SingleField(type.ToString(), 0.0f, ft2);
                    break;

                case NumericType.INT64:
                    field = new Int64Field(type.ToString(), 0L, ft2);
                    break;

                case NumericType.DOUBLE:
                    field = new DoubleField(type.ToString(), 0.0, ft2);
                    break;

                default:
                    fail();
                    field = null;
                    break;
                }
                numericFieldMap.Put(type.ToString(), field);
                doc.Add(field);
            }

            numericConfigMap.Put(DATE_FIELD_NAME, new NumericConfig(PRECISION_STEP,
                                                                    DATE_FORMAT, NumericType.INT64));
            FieldType ft = new FieldType(Int64Field.TYPE_NOT_STORED);

            ft.IsStored             = (true);
            ft.NumericPrecisionStep = (PRECISION_STEP);
            Int64Field dateField = new Int64Field(DATE_FIELD_NAME, 0L, ft);

            numericFieldMap.Put(DATE_FIELD_NAME, dateField);
            doc.Add(dateField);

            foreach (NumberType numberType in Enum.GetValues(typeof(NumberType)))
            {
                setFieldValues(numberType, numericFieldMap);
                if (VERBOSE)
                {
                    Console.WriteLine("Indexing document: " + doc);
                }
                writer.AddDocument(doc);
            }

            reader   = writer.GetReader();
            searcher = NewSearcher(reader);
            writer.Dispose();
        }
Esempio n. 15
0
        private void DoTestSeekDoesNotExist(Random r, int numField, IList <Term> fieldTerms, Term[] fieldTermsArray, IndexReader reader)
        {
            IDictionary <string, TermsEnum> tes = new Dictionary <string, TermsEnum>();

            if (Verbose)
            {
                Console.WriteLine("TEST: top random seeks");
            }

            {
                int num = AtLeast(100);
                for (int iter = 0; iter < num; iter++)
                {
                    // seek to random spot
                    string field = ("f" + r.Next(numField)).Intern();
                    Term   tx    = new Term(field, GetRandomString(r));

                    int spot = Array.BinarySearch(fieldTermsArray, tx);

                    if (spot < 0)
                    {
                        if (Verbose)
                        {
                            Console.WriteLine("TEST: non-exist seek to " + field + ":" + UnicodeUtil.ToHexString(tx.Text()));
                        }

                        // term does not exist:
                        TermsEnum te;
                        if (!tes.TryGetValue(field, out te))
                        {
                            te         = MultiFields.GetTerms(reader, field).GetIterator(null);
                            tes[field] = te;
                        }

                        if (Verbose)
                        {
                            Console.WriteLine("  got enum");
                        }

                        spot = -spot - 1;

                        if (spot == fieldTerms.Count || !fieldTerms[spot].Field.Equals(field, StringComparison.Ordinal))
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.END, te.SeekCeil(tx.Bytes));
                        }
                        else
                        {
                            Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, te.SeekCeil(tx.Bytes));

                            if (Verbose)
                            {
                                Console.WriteLine("  got term=" + UnicodeUtil.ToHexString(te.Term.Utf8ToString()));
                                Console.WriteLine("  exp term=" + UnicodeUtil.ToHexString(fieldTerms[spot].Text()));
                            }

                            Assert.AreEqual(fieldTerms[spot].Bytes, te.Term);

                            // now .next() this many times:
                            int ct = TestUtil.NextInt32(r, 5, 100);
                            for (int i = 0; i < ct; i++)
                            {
                                if (Verbose)
                                {
                                    Console.WriteLine("TEST: now next()");
                                }
                                if (1 + spot + i >= fieldTerms.Count)
                                {
                                    break;
                                }
                                Term term = fieldTerms[1 + spot + i];
                                if (!term.Field.Equals(field, StringComparison.Ordinal))
                                {
                                    Assert.IsNull(te.Next());
                                    break;
                                }
                                else
                                {
                                    BytesRef t = te.Next();

                                    if (Verbose)
                                    {
                                        Console.WriteLine("  got term=" + (t == null ? null : UnicodeUtil.ToHexString(t.Utf8ToString())));
                                        Console.WriteLine("       exp=" + UnicodeUtil.ToHexString(term.Text().ToString()));
                                    }

                                    Assert.AreEqual(term.Bytes, t);
                                }
                            }
                        }
                    }
                }
            }
        }
Esempio n. 16
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            IEnumerable <Type> analysisClasses = typeof(StandardAnalyzer).Assembly.GetTypes()
                                                 .Where(c => {
                var typeInfo = c;

                return(!typeInfo.IsAbstract && typeInfo.IsPublic && !typeInfo.IsInterface &&
                       typeInfo.IsClass && (typeInfo.GetCustomAttribute <ObsoleteAttribute>() == null) &&
                       (typeInfo.IsSubclassOf(typeof(Tokenizer)) || typeInfo.IsSubclassOf(typeof(TokenFilter)) || typeInfo.IsSubclassOf(typeof(CharFilter))));
            })
                                                 .ToArray();

            tokenizers   = new List <ConstructorInfo>();
            tokenfilters = new List <ConstructorInfo>();
            charfilters  = new List <ConstructorInfo>();
            foreach (Type c in analysisClasses)
            {
                foreach (ConstructorInfo ctor in c.GetConstructors())
                {
                    if (ctor.GetCustomAttribute <ObsoleteAttribute>() != null || (brokenConstructors.ContainsKey(ctor) && brokenConstructors[ctor] == ALWAYS))
                    {
                        continue;
                    }

                    var typeInfo = c;

                    if (typeInfo.IsSubclassOf(typeof(Tokenizer)))
                    {
                        assertTrue(ctor.ToString() + " has unsupported parameter types",
                                   allowedTokenizerArgs.containsAll(ctor.GetParameters().Select(p => p.ParameterType).ToArray()));
                        tokenizers.Add(ctor);
                    }
                    else if (typeInfo.IsSubclassOf(typeof(TokenFilter)))
                    {
                        assertTrue(ctor.ToString() + " has unsupported parameter types",
                                   allowedTokenFilterArgs.containsAll(ctor.GetParameters().Select(p => p.ParameterType).ToArray()));
                        tokenfilters.Add(ctor);
                    }
                    else if (typeInfo.IsSubclassOf(typeof(CharFilter)))
                    {
                        assertTrue(ctor.ToString() + " has unsupported parameter types",
                                   allowedCharFilterArgs.containsAll(ctor.GetParameters().Select(p => p.ParameterType).ToArray()));
                        charfilters.Add(ctor);
                    }
                    else
                    {
                        fail("Cannot get here");
                    }
                }
            }

            IComparer <ConstructorInfo> ctorComp = Comparer <ConstructorInfo> .Create((arg0, arg1) => arg0.ToString().CompareToOrdinal(arg1.ToString()));

            tokenizers.Sort(ctorComp);
            tokenfilters.Sort(ctorComp);
            charfilters.Sort(ctorComp);
            if (Verbose)
            {
                Console.WriteLine("tokenizers = " + tokenizers);
                Console.WriteLine("tokenfilters = " + tokenfilters);
                Console.WriteLine("charfilters = " + charfilters);
            }
        }
Esempio n. 17
0
        public virtual void TestSurrogatesOrder()
        {
            Directory dir    = NewDirectory();
            var       config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            config.Codec = new PreFlexRWCodec();
            RandomIndexWriter w = new RandomIndexWriter(Random, dir, config);

            int numField = TestUtil.NextInt32(Random, 2, 5);

            int uniqueTermCount = 0;

            int tc = 0;

            var fieldTerms = new List <Term>();

            for (int f = 0; f < numField; f++)
            {
                string field    = "f" + f;
                int    numTerms = AtLeast(200);

                ISet <string> uniqueTerms = new JCG.HashSet <string>();

                for (int i = 0; i < numTerms; i++)
                {
                    string term = GetRandomString(Random) + "_ " + (tc++);
                    uniqueTerms.Add(term);
                    fieldTerms.Add(new Term(field, term));
                    Documents.Document doc = new Documents.Document();
                    doc.Add(NewStringField(field, term, Field.Store.NO));
                    w.AddDocument(doc);
                }
                uniqueTermCount += uniqueTerms.Count;
            }

            IndexReader reader = w.GetReader();

            if (Verbose)
            {
                fieldTerms.Sort(termAsUTF16Comparer);

                Console.WriteLine("\nTEST: UTF16 order");
                foreach (Term t in fieldTerms)
                {
                    Console.WriteLine("  " + ToHexString(t));
                }
            }

            // sorts in code point order:
            fieldTerms.Sort();

            if (Verbose)
            {
                Console.WriteLine("\nTEST: codepoint order");
                foreach (Term t in fieldTerms)
                {
                    Console.WriteLine("  " + ToHexString(t));
                }
            }

            Term[] fieldTermsArray = fieldTerms.ToArray();

            //SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);

            //FieldsProducer fields = codec.fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, 1));
            //Assert.IsNotNull(fields);

            DoTestStraightEnum(fieldTerms, reader, uniqueTermCount);
            DoTestSeekExists(Random, fieldTerms, reader);
            DoTestSeekDoesNotExist(Random, numField, fieldTerms, fieldTermsArray, reader);

            reader.Dispose();
            w.Dispose();
            dir.Dispose();
        }
Esempio n. 18
0
        private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter,
                                       int numberOfDocumentsToIndex)
        {
            for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("indexIter=" + indexIter);
                }
                Directory         dir = NewDirectory();
                RandomIndexWriter w   = new RandomIndexWriter(Random, dir,
                                                              NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false))
                                                              .SetMergePolicy(NewLogMergePolicy()));
                bool scoreDocsInOrder         = TestJoinUtil.Random.NextBoolean();
                IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument,
                                                              scoreDocsInOrder);

                IndexReader topLevelReader = w.GetReader();
                w.Dispose();
                for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++)
                {
                    if (VERBOSE)
                    {
                        Console.WriteLine("searchIter=" + searchIter);
                    }
                    IndexSearcher indexSearcher = NewSearcher(topLevelReader);

                    int         r              = Random.Next(context.RandomUniqueValues.Length);
                    bool        from           = context.RandomFrom[r];
                    string      randomValue    = context.RandomUniqueValues[r];
                    FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader,
                                                                      context);

                    Query actualQuery = new TermQuery(new Term("value", randomValue));
                    if (VERBOSE)
                    {
                        Console.WriteLine("actualQuery=" + actualQuery);
                    }

                    var       scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length;
                    ScoreMode scoreMode       = (ScoreMode)Random.Next(scoreModeLength);
                    if (VERBOSE)
                    {
                        Console.WriteLine("scoreMode=" + scoreMode);
                    }

                    Query joinQuery;
                    if (from)
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    else
                    {
                        joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery,
                                                             indexSearcher, scoreMode);
                    }
                    if (VERBOSE)
                    {
                        Console.WriteLine("joinQuery=" + joinQuery);
                    }

                    // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
                    FixedBitSet          actualResult         = new FixedBitSet(indexSearcher.IndexReader.MaxDoc);
                    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false);
                    indexSearcher.Search(joinQuery,
                                         new CollectorAnonymousInnerClassHelper2(this, scoreDocsInOrder, context, actualResult,
                                                                                 topScoreDocCollector));
                    // Asserting bit set...
                    if (VERBOSE)
                    {
                        Console.WriteLine("expected cardinality:" + expectedResult.Cardinality());
                        DocIdSetIterator iterator = expectedResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                        Console.WriteLine("actual cardinality:" + actualResult.Cardinality());
                        iterator = actualResult.GetIterator();
                        for (int doc = iterator.NextDoc();
                             doc != DocIdSetIterator.NO_MORE_DOCS;
                             doc = iterator.NextDoc())
                        {
                            Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id")));
                        }
                    }
                    assertEquals(expectedResult, actualResult);

                    // Asserting TopDocs...
                    TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context);
                    TopDocs actualTopDocs   = topScoreDocCollector.GetTopDocs();
                    assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits);
                    assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length);
                    if (scoreMode == ScoreMode.None)
                    {
                        continue;
                    }

                    assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f);
                    for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++)
                    {
                        if (VERBOSE)
                        {
                            string.Format("Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                            string.Format("Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score);
                        }
                        assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f);
                        Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc);
                        assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f);
                    }
                }
                topLevelReader.Dispose();
                dir.Dispose();
            }
        }
Esempio n. 19
0
        public virtual void Test()
        {
            // LUCENENET specific - disable the test if not 64 bit
            AssumeTrue("This test consumes too much RAM be run on x86.", Constants.RUNTIME_IS_64BIT);

            MockDirectoryWrapper dir = new MockDirectoryWrapper(Random, new MMapDirectory(CreateTempDir("4GBStoredFields")));

            dir.Throttling = Throttling.NEVER;

            var config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                         .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                         .SetRAMBufferSizeMB(256.0)
                         .SetMergeScheduler(new ConcurrentMergeScheduler())
                         .SetMergePolicy(NewLogMergePolicy(false, 10))
                         .SetOpenMode(OpenMode.CREATE);
            IndexWriter w = new IndexWriter(dir, config);

            MergePolicy mp = w.Config.MergePolicy;

            if (mp is LogByteSizeMergePolicy)
            {
                // 1 petabyte:
                ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024;
            }

            Document  doc = new Document();
            FieldType ft  = new FieldType();

            ft.IsIndexed = false;
            ft.IsStored  = true;
            ft.Freeze();
            int valueLength = RandomInts.RandomInt32Between(Random, 1 << 13, 1 << 20);
            var value       = new byte[valueLength];

            for (int i = 0; i < valueLength; ++i)
            {
                // random so that even compressing codecs can't compress it
                value[i] = (byte)Random.Next(256);
            }
            Field f = new Field("fld", value, ft);

            doc.Add(f);

            int numDocs = (int)((1L << 32) / valueLength + 100);

            for (int i = 0; i < numDocs; ++i)
            {
                w.AddDocument(doc);
                if (Verbose && i % (numDocs / 10) == 0)
                {
                    Console.WriteLine(i + " of " + numDocs + "...");
                }
            }
            w.ForceMerge(1);
            w.Dispose();
            if (Verbose)
            {
                bool found = false;
                foreach (string file in dir.ListAll())
                {
                    if (file.EndsWith(".fdt", StringComparison.Ordinal))
                    {
                        long fileLength = dir.FileLength(file);
                        if (fileLength >= 1L << 32)
                        {
                            found = true;
                        }
                        Console.WriteLine("File length of " + file + " : " + fileLength);
                    }
                }
                if (!found)
                {
                    Console.WriteLine("No .fdt file larger than 4GB, test bug?");
                }
            }

            DirectoryReader rd = DirectoryReader.Open(dir);
            Document        sd = rd.Document(numDocs - 1);

            Assert.IsNotNull(sd);
            Assert.AreEqual(1, sd.Fields.Count);
            BytesRef valueRef = sd.GetBinaryValue("fld");

            Assert.IsNotNull(valueRef);
            Assert.AreEqual(new BytesRef(value), valueRef);
            rd.Dispose();

            dir.Dispose();
        }
Esempio n. 20
0
        // randomly seeks to term that we know exists, then next's
        // from there
        private void DoTestSeekExists(Random r, IList <Term> fieldTerms, IndexReader reader)
        {
            IDictionary <string, TermsEnum> tes = new Dictionary <string, TermsEnum>();

            // Test random seek to existing term, then enum:
            if (Verbose)
            {
                Console.WriteLine("\nTEST: top now seek");
            }

            int num = AtLeast(100);

            for (int iter = 0; iter < num; iter++)
            {
                // pick random field+term
                int    spot  = r.Next(fieldTerms.Count);
                Term   term  = fieldTerms[spot];
                string field = term.Field;

                if (Verbose)
                {
                    Console.WriteLine("TEST: exist seek field=" + field + " term=" + UnicodeUtil.ToHexString(term.Text()));
                }

                // seek to it
                if (!tes.TryGetValue(field, out TermsEnum te))
                {
                    te         = MultiFields.GetTerms(reader, field).GetEnumerator();
                    tes[field] = te;
                }

                if (Verbose)
                {
                    Console.WriteLine("  done get enum");
                }

                // seek should find the term
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, te.SeekCeil(term.Bytes));

                // now .next() this many times:
                int ct = TestUtil.NextInt32(r, 5, 100);
                for (int i = 0; i < ct; i++)
                {
                    if (Verbose)
                    {
                        Console.WriteLine("TEST: now next()");
                    }
                    if (1 + spot + i >= fieldTerms.Count)
                    {
                        break;
                    }
                    term = fieldTerms[1 + spot + i];
                    if (!term.Field.Equals(field, StringComparison.Ordinal))
                    {
                        Assert.IsFalse(te.MoveNext());
                        break;
                    }
                    else
                    {
                        Assert.IsTrue(te.MoveNext());
                        BytesRef t = te.Term;

                        if (Verbose)
                        {
                            Console.WriteLine("  got term=" + (t == null ? null : UnicodeUtil.ToHexString(t.Utf8ToString())));
                            Console.WriteLine("       exp=" + UnicodeUtil.ToHexString(term.Text().ToString()));
                        }

                        Assert.AreEqual(term.Bytes, t);
                    }
                }
            }
        }
Esempio n. 21
0
        public async Task TestConcurrentAccess()
        {
            assertEquals(1, searchers.Count);
            using IndexReader r = DirectoryReader.Open(userindex);
            spellChecker.ClearIndex();
            assertEquals(2, searchers.Count);
            Addwords(r, spellChecker, "field1");
            assertEquals(3, searchers.Count);
            int num_field1 = this.NumDoc();

            Addwords(r, spellChecker, "field2");
            assertEquals(4, searchers.Count);
            int num_field2 = this.NumDoc();

            assertEquals(num_field2, num_field1 + 1);
            int numThreads = 5 + Random.nextInt(5);
            var tasks      = new ConcurrentBag <Task>();

            SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
            var executor = new LimitedConcurrencyLevelTaskScheduler(numThreads); // LUCENENET NOTE: Not sure why in Java they decided to pass the max concurrent threads as all of the threads, but this demonstrates how to use a custom TaskScheduler in .NET.

            using var shutdown = new CancellationTokenSource();
            var cancellationToken = shutdown.Token;
            var stop        = new AtomicBoolean(false);
            var taskFactory = new TaskFactory(executor);

            for (int i = 0; i < numThreads; i++)
            {
                SpellCheckWorker spellCheckWorker = new SpellCheckWorker(this, r, stop, cancellationToken, taskNum: i);
                workers[i] = spellCheckWorker;
                tasks.Add(taskFactory.StartNew(() => spellCheckWorker.Run(), cancellationToken));
            }

            int iterations = 5 + Random.nextInt(5);

            for (int i = 0; i < iterations; i++)
            {
                Thread.Sleep(100);
                // concurrently reset the spell index
                spellChecker.SetSpellIndex(this.spellindex);
                // for debug - prints the internal open searchers
                // showSearchersOpen();
            }

            stop.Value = true;
            executor.Shutdown(); // Stop allowing tasks to queue
            try
            {
                // wait for 60 seconds - usually this is very fast but coverage runs could take quite long
                shutdown.CancelAfter(TimeSpan.FromSeconds(60));
                await Task.WhenAll(tasks.ToArray());
            }
            catch (OperationCanceledException)
            {
                if (Verbose)
                {
                    Console.WriteLine($"\n{nameof(OperationCanceledException)} thrown\n(safe shutdown after timeout)");
                }
            }
            finally
            {
                shutdown.Dispose();
                spellChecker.Dispose(); // In Lucene, this was the line that did "stop" and the running task responded to the AlreadyClosedException to break out of the loop, but we are using AtomicBoolean to signal instead.
            }

            for (int i = 0; i < workers.Length; i++)
            {
                assertFalse(string.Format(CultureInfo.InvariantCulture, "worker thread {0} failed \n" + workers[i].Error, i), workers[i].Error != null);
                assertTrue(string.Format(CultureInfo.InvariantCulture, "worker thread {0} is still running but should be terminated", i), workers[i].terminated);
            }
            // 4 searchers more than iterations
            // 1. at creation
            // 2. clearIndex()
            // 2. and 3. during addwords
            assertEquals(iterations + 4, searchers.Count);
            AssertSearchersClosed();
        }