public override void BeforeClass() { base.BeforeClass(); ANALYZER = new MockAnalyzer(Random()); qp = new StandardQueryParser(ANALYZER); HashMap <String, /*Number*/ object> randomNumberMap = new HashMap <string, object>(); /*SimpleDateFormat*/ string dateFormat; long randomDate; bool dateFormatSanityCheckPass; int count = 0; do { if (count > 100) { fail("This test has problems to find a sane random DateFormat/NumberFormat. Stopped trying after 100 iterations."); } dateFormatSanityCheckPass = true; LOCALE = RandomLocale(Random()); TIMEZONE = RandomTimeZone(Random()); DATE_STYLE = randomDateStyle(Random()); TIME_STYLE = randomDateStyle(Random()); //// assumes localized date pattern will have at least year, month, day, //// hour, minute //dateFormat = (SimpleDateFormat)DateFormat.getDateTimeInstance( // DATE_STYLE, TIME_STYLE, LOCALE); //// not all date patterns includes era, full year, timezone and second, //// so we add them here //dateFormat.applyPattern(dateFormat.toPattern() + " G s Z yyyy"); //dateFormat.setTimeZone(TIMEZONE); // assumes localized date pattern will have at least year, month, day, // hour, minute DATE_FORMAT = new NumberDateFormat(DATE_STYLE, TIME_STYLE, LOCALE) { TimeZone = TIMEZONE }; // not all date patterns includes era, full year, timezone and second, // so we add them here DATE_FORMAT.SetDateFormat(DATE_FORMAT.GetDateFormat() + " g s z yyyy"); dateFormat = DATE_FORMAT.GetDateFormat(); do { randomDate = Random().nextLong(); // prune date value so it doesn't pass in insane values to some // calendars. randomDate = randomDate % 3400000000000L; // truncate to second randomDate = (randomDate / 1000L) * 1000L; // only positive values randomDate = Math.Abs(randomDate); } while (randomDate == 0L); dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, randomDate); dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, 0); dateFormatSanityCheckPass &= checkDateFormatSanity(dateFormat, -randomDate); count++; } while (!dateFormatSanityCheckPass); //NUMBER_FORMAT = NumberFormat.getNumberInstance(LOCALE); //NUMBER_FORMAT.setMaximumFractionDigits((Random().nextInt() & 20) + 1); //NUMBER_FORMAT.setMinimumFractionDigits((Random().nextInt() & 20) + 1); //NUMBER_FORMAT.setMaximumIntegerDigits((Random().nextInt() & 20) + 1); //NUMBER_FORMAT.setMinimumIntegerDigits((Random().nextInt() & 20) + 1); NUMBER_FORMAT = new NumberFormat(LOCALE); double randomDouble; long randomLong; int randomInt; float randomFloat; while ((randomLong = Convert.ToInt64(NormalizeNumber(Math.Abs(Random().nextLong())) )) == 0L) { ; } while ((randomDouble = Convert.ToDouble(NormalizeNumber(Math.Abs(Random().NextDouble())) )) == 0.0) { ; } while ((randomFloat = Convert.ToSingle(NormalizeNumber(Math.Abs(Random().nextFloat())) )) == 0.0f) { ; } while ((randomInt = Convert.ToInt32(NormalizeNumber(Math.Abs(Random().nextInt())))) == 0) { ; } randomNumberMap.Put(NumericType.INT64.ToString(), randomLong); randomNumberMap.Put(NumericType.INT32.ToString(), randomInt); randomNumberMap.Put(NumericType.SINGLE.ToString(), randomFloat); randomNumberMap.Put(NumericType.DOUBLE.ToString(), randomDouble); randomNumberMap.Put(DATE_FIELD_NAME, randomDate); RANDOM_NUMBER_MAP = Collections.UnmodifiableMap(randomNumberMap); directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(TestUtil.NextInt(Random(), 50, 1000)) .SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); HashMap <String, NumericConfig> numericConfigMap = new HashMap <String, NumericConfig>(); HashMap <String, Field> numericFieldMap = new HashMap <String, Field>(); qp.NumericConfigMap = (numericConfigMap); foreach (NumericType type in Enum.GetValues(typeof(NumericType))) { if (type == NumericType.NONE) { continue; } numericConfigMap.Put(type.ToString(), new NumericConfig(PRECISION_STEP, NUMBER_FORMAT, type)); FieldType ft2 = new FieldType(Int32Field.TYPE_NOT_STORED); ft2.NumericType = (type); ft2.IsStored = (true); ft2.NumericPrecisionStep = (PRECISION_STEP); ft2.Freeze(); Field field; switch (type) { case NumericType.INT32: field = new Int32Field(type.ToString(), 0, ft2); break; case NumericType.SINGLE: field = new SingleField(type.ToString(), 0.0f, ft2); break; case NumericType.INT64: field = new Int64Field(type.ToString(), 0L, ft2); break; case NumericType.DOUBLE: field = new DoubleField(type.ToString(), 0.0, ft2); break; default: fail(); field = null; break; } numericFieldMap.Put(type.ToString(), field); doc.Add(field); } numericConfigMap.Put(DATE_FIELD_NAME, new NumericConfig(PRECISION_STEP, DATE_FORMAT, NumericType.INT64)); FieldType ft = new FieldType(Int64Field.TYPE_NOT_STORED); ft.IsStored = (true); ft.NumericPrecisionStep = (PRECISION_STEP); Int64Field dateField = new Int64Field(DATE_FIELD_NAME, 0L, ft); numericFieldMap.Put(DATE_FIELD_NAME, dateField); doc.Add(dateField); foreach (NumberType numberType in Enum.GetValues(typeof(NumberType))) { setFieldValues(numberType, numericFieldMap); if (VERBOSE) { Console.WriteLine("Indexing document: " + doc); } writer.AddDocument(doc); } reader = writer.Reader; searcher = NewSearcher(reader); writer.Dispose(); }
public virtual TokenInfoDictionaryWriter BuildDictionary(IList <string> csvFiles) { TokenInfoDictionaryWriter dictionary = new TokenInfoDictionaryWriter(10 * 1024 * 1024); // all lines in the file Console.WriteLine(" parse..."); List <string[]> lines = new List <string[]>(400000); foreach (string file in csvFiles) { using (Stream inputStream = new FileStream(file, FileMode.Open, FileAccess.Read)) { Encoding decoder = Encoding.GetEncoding(encoding); TextReader reader = new StreamReader(inputStream, decoder); string line = null; while ((line = reader.ReadLine()) != null) { string[] entry = CSVUtil.Parse(line); if (entry.Length < 13) { Console.WriteLine("Entry in CSV is not valid: " + line); continue; } string[] formatted = FormatEntry(entry); lines.Add(formatted); // NFKC normalize dictionary entry if (normalizeEntries) { //if (normalizer.isNormalized(entry[0])){ if (entry[0].IsNormalized(NormalizationForm.FormKC)) { continue; } string[] normalizedEntry = new string[entry.Length]; for (int i = 0; i < entry.Length; i++) { //normalizedEntry[i] = normalizer.normalize(entry[i]); normalizedEntry[i] = entry[i].Normalize(NormalizationForm.FormKC); } formatted = FormatEntry(normalizedEntry); lines.Add(formatted); } } } } Console.WriteLine(" sort..."); // sort by term: we sorted the files already and use a stable sort. lines.Sort(new ComparerAnonymousHelper()); Console.WriteLine(" encode..."); PositiveInt32Outputs fstOutput = PositiveInt32Outputs.Singleton; Builder <long?> fstBuilder = new Builder <long?>(Lucene.Net.Util.Fst.FST.INPUT_TYPE.BYTE2, 0, 0, true, true, int.MaxValue, fstOutput, null, true, PackedInt32s.DEFAULT, true, 15); Int32sRef scratch = new Int32sRef(); long ord = -1; // first ord will be 0 string lastValue = null; // build tokeninfo dictionary foreach (string[] entry in lines) { int next = dictionary.Put(entry); if (next == offset) { Console.WriteLine("Failed to process line: " + Collections.ToString(entry)); continue; } string token = entry[0]; if (!token.Equals(lastValue, StringComparison.Ordinal)) { // new word to add to fst ord++; lastValue = token; scratch.Grow(token.Length); scratch.Length = token.Length; for (int i = 0; i < token.Length; i++) { scratch.Int32s[i] = (int)token[i]; } fstBuilder.Add(scratch, ord); } dictionary.AddMapping((int)ord, offset); offset = next; } FST <long?> fst = fstBuilder.Finish(); Console.WriteLine(" " + fst.NodeCount + " nodes, " + fst.ArcCount + " arcs, " + fst.GetSizeInBytes() + " bytes... "); dictionary.SetFST(fst); Console.WriteLine(" done"); return(dictionary); }
public virtual void TestIndexing() { DirectoryInfo tmpDir = CreateTempDir("TestNeverDelete"); BaseDirectoryWrapper d = NewFSDirectory(tmpDir); // We want to "see" files removed if Lucene removed // them. this is still worth running on Windows since // some files the IR opens and closes. if (d is MockDirectoryWrapper) { ((MockDirectoryWrapper)d).NoDeleteOpenFile = false; } RandomIndexWriter w = new RandomIndexWriter(Random(), d, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); w.w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 5, 30)); w.Commit(); ThreadClass[] indexThreads = new ThreadClass[Random().Next(4)]; long stopTime = Environment.TickCount + AtLeast(1000); for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x] = new ThreadAnonymousInnerClassHelper(w, stopTime, NewStringField, NewTextField); indexThreads[x].Name = "Thread " + x; indexThreads[x].Start(); } HashSet <string> allFiles = new HashSet <string>(); DirectoryReader r = DirectoryReader.Open(d); while (Environment.TickCount < stopTime) { IndexCommit ic = r.IndexCommit; if (VERBOSE) { Console.WriteLine("TEST: check files: " + ic.FileNames); } allFiles.AddAll(ic.FileNames); // Make sure no old files were removed foreach (string fileName in allFiles) { Assert.IsTrue(SlowFileExists(d, fileName), "file " + fileName + " does not exist"); } DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } Thread.Sleep(1); } r.Dispose(); foreach (ThreadClass t in indexThreads) { t.Join(); } w.Dispose(); d.Dispose(); System.IO.Directory.Delete(tmpDir.FullName, true); }
internal virtual void AssertQuery(Query query, Filter filter, Sort sort) { int maxDoc = Searcher.IndexReader.MaxDoc; TopDocs all; int pageSize = TestUtil.NextInt32(Random, 1, maxDoc * 2); if (isVerbose) { Console.WriteLine("\nassertQuery " + (Iter++) + ": query=" + query + " filter=" + filter + " sort=" + sort + " pageSize=" + pageSize); } bool doMaxScore = Random.NextBoolean(); bool doScores = Random.NextBoolean(); if (sort == null) { all = Searcher.Search(query, filter, maxDoc); } else if (sort == Sort.RELEVANCE) { all = Searcher.Search(query, filter, maxDoc, sort, true, doMaxScore); } else { all = Searcher.Search(query, filter, maxDoc, sort, doScores, doMaxScore); } if (isVerbose) { Console.WriteLine(" all.TotalHits=" + all.TotalHits); int upto = 0; foreach (ScoreDoc scoreDoc in all.ScoreDocs) { Console.WriteLine(" hit " + (upto++) + ": id=" + Searcher.Doc(scoreDoc.Doc).Get("id") + " " + scoreDoc); } } int pageStart = 0; ScoreDoc lastBottom = null; while (pageStart < all.TotalHits) { TopDocs paged; if (sort == null) { if (isVerbose) { Console.WriteLine(" iter lastBottom=" + lastBottom); } paged = Searcher.SearchAfter(lastBottom, query, filter, pageSize); } else { if (isVerbose) { Console.WriteLine(" iter lastBottom=" + lastBottom); } if (sort == Sort.RELEVANCE) { paged = Searcher.SearchAfter(lastBottom, query, filter, pageSize, sort, true, doMaxScore); } else { paged = Searcher.SearchAfter(lastBottom, query, filter, pageSize, sort, doScores, doMaxScore); } } if (isVerbose) { Console.WriteLine(" " + paged.ScoreDocs.Length + " hits on page"); } if (paged.ScoreDocs.Length == 0) { break; } AssertPage(pageStart, all, paged); pageStart += paged.ScoreDocs.Length; lastBottom = paged.ScoreDocs[paged.ScoreDocs.Length - 1]; } Assert.AreEqual(all.ScoreDocs.Length, pageStart); }
public void TestLazy() { int id = Random.nextInt(NUM_DOCS); IndexReader reader = DirectoryReader.Open(dir); try { Query q = new TermQuery(new Term("docid", "" + id)); IndexSearcher searcher = NewSearcher(reader); ScoreDoc[] hits = searcher.Search(q, 100).ScoreDocs; assertEquals("Too many docs", 1, hits.Length); LazyTestingStoredFieldVisitor visitor = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc), FIELDS); reader.Document(hits[0].Doc, visitor); Document d = visitor.doc; int numFieldValues = 0; IDictionary <string, int> fieldValueCounts = new JCG.Dictionary <string, int>(); // at this point, all FIELDS should be Lazy and unrealized foreach (IIndexableField f in d) { numFieldValues++; if (f.Name.Equals("never_load", StringComparison.Ordinal)) { fail("never_load was loaded"); } if (f.Name.Equals("load_later", StringComparison.Ordinal)) { fail("load_later was loaded on first pass"); } if (f.Name.Equals("docid", StringComparison.Ordinal)) { assertFalse(f.Name, f is LazyDocument.LazyField); } else { if (!fieldValueCounts.TryGetValue(f.Name, out int count)) { count = 0; } count++; fieldValueCounts.Put(f.Name, count); assertTrue(f.Name + " is " + f.GetType(), f is LazyDocument.LazyField); LazyDocument.LazyField lf = (LazyDocument.LazyField)f; assertFalse(f.Name + " is loaded", lf.HasBeenLoaded); } } Console.WriteLine("numFieldValues == " + numFieldValues); assertEquals("numFieldValues", 1 + (NUM_VALUES * FIELDS.Length), numFieldValues); foreach (string field in fieldValueCounts.Keys) { assertEquals("fieldName count: " + field, NUM_VALUES, fieldValueCounts[field]); } // pick a single field name to load a single value string fieldName = FIELDS[Random.nextInt(FIELDS.Length)]; IIndexableField[] fieldValues = d.GetFields(fieldName); assertEquals("#vals in field: " + fieldName, NUM_VALUES, fieldValues.Length); int valNum = Random.nextInt(fieldValues.Length); assertEquals(id + "_" + fieldName + "_" + valNum, fieldValues[valNum].GetStringValue()); // now every value of fieldName should be loaded foreach (IIndexableField f in d) { if (f.Name.Equals("never_load", StringComparison.Ordinal)) { fail("never_load was loaded"); } if (f.Name.Equals("load_later", StringComparison.Ordinal)) { fail("load_later was loaded too soon"); } if (f.Name.Equals("docid", StringComparison.Ordinal)) { assertFalse(f.Name, f is LazyDocument.LazyField); } else { assertTrue(f.Name + " is " + f.GetType(), f is LazyDocument.LazyField); LazyDocument.LazyField lf = (LazyDocument.LazyField)f; assertEquals(f.Name + " is loaded?", lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded); } } // use the same LazyDoc to ask for one more lazy field visitor = new LazyTestingStoredFieldVisitor(new LazyDocument(reader, hits[0].Doc), "load_later"); reader.Document(hits[0].Doc, visitor); d = visitor.doc; // ensure we have all the values we expect now, and that // adding one more lazy field didn't "unload" the existing LazyField's // we already loaded. foreach (IIndexableField f in d) { if (f.Name.Equals("never_load", StringComparison.Ordinal)) { fail("never_load was loaded"); } if (f.Name.Equals("docid", StringComparison.Ordinal)) { assertFalse(f.Name, f is LazyDocument.LazyField); } else { assertTrue(f.Name + " is " + f.GetType(), f is LazyDocument.LazyField); LazyDocument.LazyField lf = (LazyDocument.LazyField)f; assertEquals(f.Name + " is loaded?", lf.Name.Equals(fieldName, StringComparison.Ordinal), lf.HasBeenLoaded); } } // even the underlying doc shouldn't have never_load assertNull("never_load was loaded in wrapped doc", visitor.lazyDoc.GetDocument().GetField("never_load")); } finally { reader.Dispose(); } }
public virtual void TestUnsupportedOldIndexes() { for (int i = 0; i < UnsupportedNames.Length; i++) { if (VERBOSE) { Console.WriteLine("TEST: index " + UnsupportedNames[i]); } DirectoryInfo oldIndxeDir = CreateTempDir(UnsupportedNames[i]); using (Stream dataFile = GetDataFile("unsupported." + UnsupportedNames[i] + ".zip")) { TestUtil.Unzip(dataFile, oldIndxeDir); } BaseDirectoryWrapper dir = NewFSDirectory(oldIndxeDir); // don't checkindex, these are intentionally not supported dir.CheckIndexOnClose = false; IndexReader reader = null; IndexWriter writer = null; try { reader = DirectoryReader.Open(dir); Assert.Fail("DirectoryReader.open should not pass for " + UnsupportedNames[i]); } #pragma warning disable 168 catch (IndexFormatTooOldException e) #pragma warning restore 168 { // pass } finally { if (reader != null) { reader.Dispose(); } reader = null; } try { writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Assert.Fail("IndexWriter creation should not pass for " + UnsupportedNames[i]); } catch (IndexFormatTooOldException e) { // pass if (VERBOSE) { Console.WriteLine("TEST: got expected exc:"); Console.WriteLine(e.StackTrace); } // Make sure exc message includes a path= Assert.IsTrue(e.Message.IndexOf("path=\"") != -1, "got exc message: " + e.Message); } finally { // we should fail to open IW, and so it should be null when we get here. // However, if the test fails (i.e., IW did not fail on open), we need // to close IW. However, if merges are run, IW may throw // IndexFormatTooOldException, and we don't want to mask the Assert.Fail() // above, so close without waiting for merges. if (writer != null) { writer.Dispose(false); } writer = null; } StringBuilder sb = new StringBuilder(1024); CheckIndex checker = new CheckIndex(dir); CheckIndex.Status indexStatus; using (var infoStream = new StringWriter(sb)) { checker.InfoStream = infoStream; indexStatus = checker.DoCheckIndex(); } Assert.IsFalse(indexStatus.Clean); Assert.IsTrue(sb.ToString().Contains(typeof(IndexFormatTooOldException).Name)); dir.Dispose(); TestUtil.Rm(oldIndxeDir); } }
private void TestRandomTrieAndClassicRangeQuery(int precisionStep) { string field = "field" + precisionStep; int totalTermCountT = 0, totalTermCountC = 0, termCountT, termCountC; int num = TestUtil.NextInt(Random(), 10, 20); for (int i = 0; i < num; i++) { long lower = (long)(Random().NextDouble() * NoDocs * Distance) + StartOffset; long upper = (long)(Random().NextDouble() * NoDocs * Distance) + StartOffset; if (lower > upper) { long a = lower; lower = upper; upper = a; } BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT64), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT64); NumericUtils.Int64ToPrefixCodedBytes(lower, 0, lowerBytes); NumericUtils.Int64ToPrefixCodedBytes(upper, 0, upperBytes); // test inclusive range NumericRangeQuery <long> tq = NumericRangeQuery.NewInt64Range(field, precisionStep, lower, upper, true, true); TermRangeQuery cq = new TermRangeQuery(field, lowerBytes, upperBytes, true, true); TopDocs tTopDocs = Searcher.Search(tq, 1); TopDocs cTopDocs = Searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.TotalHits, tTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); totalTermCountT += termCountT = CountTerms(tq); totalTermCountC += termCountC = CountTerms(cq); CheckTermCounts(precisionStep, termCountT, termCountC); // test exclusive range tq = NumericRangeQuery.NewInt64Range(field, precisionStep, lower, upper, false, false); cq = new TermRangeQuery(field, lowerBytes, upperBytes, false, false); tTopDocs = Searcher.Search(tq, 1); cTopDocs = Searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.TotalHits, tTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); totalTermCountT += termCountT = CountTerms(tq); totalTermCountC += termCountC = CountTerms(cq); CheckTermCounts(precisionStep, termCountT, termCountC); // test left exclusive range tq = NumericRangeQuery.NewInt64Range(field, precisionStep, lower, upper, false, true); cq = new TermRangeQuery(field, lowerBytes, upperBytes, false, true); tTopDocs = Searcher.Search(tq, 1); cTopDocs = Searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.TotalHits, tTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); totalTermCountT += termCountT = CountTerms(tq); totalTermCountC += termCountC = CountTerms(cq); CheckTermCounts(precisionStep, termCountT, termCountC); // test right exclusive range tq = NumericRangeQuery.NewInt64Range(field, precisionStep, lower, upper, true, false); cq = new TermRangeQuery(field, lowerBytes, upperBytes, true, false); tTopDocs = Searcher.Search(tq, 1); cTopDocs = Searcher.Search(cq, 1); Assert.AreEqual(cTopDocs.TotalHits, tTopDocs.TotalHits, "Returned count for NumericRangeQuery and TermRangeQuery must be equal"); totalTermCountT += termCountT = CountTerms(tq); totalTermCountC += termCountC = CountTerms(cq); CheckTermCounts(precisionStep, termCountT, termCountC); } CheckTermCounts(precisionStep, totalTermCountT, totalTermCountC); if (VERBOSE && precisionStep != int.MaxValue) { Console.WriteLine("Average number of terms during random search on '" + field + "':"); Console.WriteLine(" Numeric query: " + (((double)totalTermCountT) / (num * 4))); Console.WriteLine(" Classical query: " + (((double)totalTermCountC) / (num * 4))); } }
public override CollectionStatistics CollectionStatistics(string field) { // TODO: we could compute this on init and cache, // since we are re-inited whenever any nodes have a // new reader long docCount = 0; long sumTotalTermFreq = 0; long sumDocFreq = 0; long maxDoc = 0; for (int nodeID = 0; nodeID < nodeVersions.Length; nodeID++) { FieldAndShardVersion key = new FieldAndShardVersion(nodeID, nodeVersions[nodeID], field); CollectionStatistics nodeStats; if (nodeID == MyNodeID) { nodeStats = base.CollectionStatistics(field); } else { nodeStats = outerInstance.collectionStatsCache[key]; } if (nodeStats == null) { Console.WriteLine("coll stats myNodeID=" + MyNodeID + ": " + outerInstance.collectionStatsCache.Keys); } // Collection stats are pre-shared on reopen, so, // we better not have a cache miss: Debug.Assert(nodeStats != null, "myNodeID=" + MyNodeID + " nodeID=" + nodeID + " version=" + nodeVersions[nodeID] + " field=" + field); long nodeDocCount = nodeStats.DocCount; if (docCount >= 0 && nodeDocCount >= 0) { docCount += nodeDocCount; } else { docCount = -1; } long nodeSumTotalTermFreq = nodeStats.SumTotalTermFreq; if (sumTotalTermFreq >= 0 && nodeSumTotalTermFreq >= 0) { sumTotalTermFreq += nodeSumTotalTermFreq; } else { sumTotalTermFreq = -1; } long nodeSumDocFreq = nodeStats.SumDocFreq; if (sumDocFreq >= 0 && nodeSumDocFreq >= 0) { sumDocFreq += nodeSumDocFreq; } else { sumDocFreq = -1; } Debug.Assert(nodeStats.MaxDoc >= 0); maxDoc += nodeStats.MaxDoc; } return(new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq)); }
internal virtual FST <T> DoTest(int prune1, int prune2, bool allowRandomSuffixSharing) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("\nTEST: prune1=" + prune1 + " prune2=" + prune2); } bool willRewrite = random.NextBoolean(); Builder <T> builder = new Builder <T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, prune1, prune2, prune1 == 0 && prune2 == 0, allowRandomSuffixSharing ? random.NextBoolean() : true, allowRandomSuffixSharing ? TestUtil.NextInt32(random, 1, 10) : int.MaxValue, outputs, null, willRewrite, PackedInt32s.DEFAULT, true, 15); if (LuceneTestCase.VERBOSE) { if (willRewrite) { Console.WriteLine("TEST: packed FST"); } else { Console.WriteLine("TEST: non-packed FST"); } } foreach (InputOutput <T> pair in pairs) { if (pair.Output is IEnumerable) { Builder <object> builderObject = builder as Builder <object>; var values = pair.Output as IEnumerable; foreach (object value in values) { builderObject.Add(pair.Input, value); } } else { builder.Add(pair.Input, pair.Output); } } FST <T> fst = builder.Finish(); if (random.NextBoolean() && fst != null && !willRewrite) { IOContext context = LuceneTestCase.NewIOContext(random); using (IndexOutput @out = dir.CreateOutput("fst.bin", context)) { fst.Save(@out); } IndexInput @in = dir.OpenInput("fst.bin", context); try { fst = new FST <T>(@in, outputs); } finally { @in.Dispose(); dir.DeleteFile("fst.bin"); } } if (LuceneTestCase.VERBOSE && pairs.Count <= 20 && fst != null) { using (TextWriter w = new StreamWriter(new FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8)) { Util.ToDot(fst, w, false, false); } Console.WriteLine("SAVED out.dot"); } if (LuceneTestCase.VERBOSE) { if (fst == null) { Console.WriteLine(" fst has 0 nodes (fully pruned)"); } else { Console.WriteLine(" fst has " + fst.NodeCount + " nodes and " + fst.ArcCount + " arcs"); } } if (prune1 == 0 && prune2 == 0) { VerifyUnPruned(inputMode, fst); } else { VerifyPruned(inputMode, fst, prune1, prune2); } return(fst); }
public virtual void TestRangeQueryId() { // NOTE: uses index build in *super* setUp IndexReader reader = SignedIndexReader; IndexSearcher search = NewSearcher(reader); if (VERBOSE) { Console.WriteLine("TEST: reader=" + reader); } int medId = ((MaxId - MinId) / 2); string minIP = Pad(MinId); string maxIP = Pad(MaxId); string medIP = Pad(medId); int numDocs = reader.NumDocs; AssertEquals("num of docs", numDocs, 1 + MaxId - MinId); ScoreDoc[] result; // test id, bounded on both ends result = search.Search(Csrq("id", minIP, maxIP, T, T), null, numDocs).ScoreDocs; AssertEquals("find all", numDocs, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("find all", numDocs, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, F), null, numDocs).ScoreDocs; AssertEquals("all but last", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("all but last", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, T), null, numDocs).ScoreDocs; AssertEquals("all but first", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("all but first", numDocs - 1, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, F), null, numDocs).ScoreDocs; AssertEquals("all but ends", numDocs - 2, result.Length); result = search.Search(Csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("all but ends", numDocs - 2, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, T), null, numDocs).ScoreDocs; AssertEquals("med and up", 1 + MaxId - medId, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("med and up", 1 + MaxId - medId, result.Length); result = search.Search(Csrq("id", minIP, medIP, T, T), null, numDocs).ScoreDocs; AssertEquals("up to med", 1 + medId - MinId, result.Length); result = search.Search(Csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("up to med", 1 + medId - MinId, result.Length); // unbounded id result = search.Search(Csrq("id", minIP, null, T, F), null, numDocs).ScoreDocs; AssertEquals("min and up", numDocs, result.Length); result = search.Search(Csrq("id", null, maxIP, F, T), null, numDocs).ScoreDocs; AssertEquals("max and down", numDocs, result.Length); result = search.Search(Csrq("id", minIP, null, F, F), null, numDocs).ScoreDocs; AssertEquals("not min, but up", numDocs - 1, result.Length); result = search.Search(Csrq("id", null, maxIP, F, F), null, numDocs).ScoreDocs; AssertEquals("not max, but down", numDocs - 1, result.Length); result = search.Search(Csrq("id", medIP, maxIP, T, F), null, numDocs).ScoreDocs; AssertEquals("med and up, not max", MaxId - medId, result.Length); result = search.Search(Csrq("id", minIP, medIP, F, T), null, numDocs).ScoreDocs; AssertEquals("not min, up to med", medId - MinId, result.Length); // very small sets result = search.Search(Csrq("id", minIP, minIP, F, F), null, numDocs).ScoreDocs; AssertEquals("min,min,F,F", 0, result.Length); result = search.Search(Csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("min,min,F,F", 0, result.Length); result = search.Search(Csrq("id", medIP, medIP, F, F), null, numDocs).ScoreDocs; AssertEquals("med,med,F,F", 0, result.Length); result = search.Search(Csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("med,med,F,F", 0, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, F, F), null, numDocs).ScoreDocs; AssertEquals("max,max,F,F", 0, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("max,max,F,F", 0, result.Length); result = search.Search(Csrq("id", minIP, minIP, T, T), null, numDocs).ScoreDocs; AssertEquals("min,min,T,T", 1, result.Length); result = search.Search(Csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("min,min,T,T", 1, result.Length); result = search.Search(Csrq("id", null, minIP, F, T), null, numDocs).ScoreDocs; AssertEquals("nul,min,F,T", 1, result.Length); result = search.Search(Csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("nul,min,F,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, T, T), null, numDocs).ScoreDocs; AssertEquals("max,max,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("max,max,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, null, T, F), null, numDocs).ScoreDocs; AssertEquals("max,nul,T,T", 1, result.Length); result = search.Search(Csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("max,nul,T,T", 1, result.Length); result = search.Search(Csrq("id", medIP, medIP, T, T), null, numDocs).ScoreDocs; AssertEquals("med,med,T,T", 1, result.Length); result = search.Search(Csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).ScoreDocs; AssertEquals("med,med,T,T", 1, result.Length); }
public virtual void Test() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); long startTime = Environment.TickCount; // TODO: replace w/ the @nightly test data; make this // into an optional @nightly stress test Document doc = new Document(); Field body = NewTextField("body", "", Field.Store.NO); doc.Add(body); StringBuilder sb = new StringBuilder(); for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int numTerms = Random.Next(10); for (int termCount = 0; termCount < numTerms; termCount++) { sb.Append(Random.NextBoolean() ? "aaa" : "bbb"); sb.Append(' '); } body.SetStringValue(sb.ToString()); w.AddDocument(doc); sb.Remove(0, sb.Length); } IndexReader r = w.GetReader(); w.Dispose(); long endTime = Environment.TickCount; if (VERBOSE) { Console.WriteLine("BUILD took " + (endTime - startTime)); } IndexSearcher s = NewSearcher(r); AtomicBoolean failed = new AtomicBoolean(); AtomicInt64 netSearch = new AtomicInt64(); ThreadClass[] threads = new ThreadClass[NUM_SEARCH_THREADS]; for (int threadID = 0; threadID < NUM_SEARCH_THREADS; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, s, failed, netSearch); threads[threadID].SetDaemon(true); } foreach (ThreadClass t in threads) { t.Start(); } foreach (ThreadClass t in threads) { t.Join(); } if (VERBOSE) { Console.WriteLine(NUM_SEARCH_THREADS + " threads did " + netSearch.Get() + " searches"); } r.Dispose(); dir.Dispose(); }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { Console.WriteLine("TEST: cycle pass="******"TEST: open writer for forceMerge"); } writer = new IndexWriter(dir, conf); policy = (KeepAllDeletionPolicy)writer.Config.IndexDeletionPolicy; writer.ForceMerge(1); writer.Dispose(); } Assert.AreEqual(needsMerging ? 2 : 1, policy.NumOnInit); // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(1 + (needsMerging ? 1 : 0), policy.NumOnCommit); // Test listCommits ICollection <IndexCommit> commits = DirectoryReader.ListCommits(dir); // 2 from closing writer Assert.AreEqual(1 + (needsMerging ? 1 : 0), commits.Count); // Make sure we can open a reader on each commit: foreach (IndexCommit commit in commits) { IndexReader r = DirectoryReader.Open(commit); r.Dispose(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetLastCommitGeneration(dir); while (gen > 0) { IndexReader reader = DirectoryReader.Open(dir); reader.Dispose(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND).SetIndexDeletionPolicy(policy)); writer.Dispose(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Dispose(); } }
private IndexContext CreateIndexContext(bool multipleFacetValuesPerDocument) { Random random = Random; int numDocs = TestUtil.NextInt32(random, 138, 1145) * RANDOM_MULTIPLIER; int numGroups = TestUtil.NextInt32(random, 1, numDocs / 4); int numFacets = TestUtil.NextInt32(random, 1, numDocs / 6); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs + " numGroups=" + numGroups); } List <string> groups = new List <string>(); for (int i = 0; i < numGroups; i++) { groups.Add(GenerateRandomNonEmptyString()); } List <string> facetValues = new List <string>(); for (int i = 0; i < numFacets; i++) { facetValues.Add(GenerateRandomNonEmptyString()); } string[] contentBrs = new string[TestUtil.NextInt32(random, 2, 20)]; if (VERBOSE) { Console.WriteLine("TEST: create fake content"); } for (int contentIDX = 0; contentIDX < contentBrs.Length; contentIDX++) { contentBrs[contentIDX] = GenerateRandomNonEmptyString(); if (VERBOSE) { Console.WriteLine(" content=" + contentBrs[contentIDX]); } } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, dir, NewIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random) ) ); bool canUseDV = !"Lucene3x".Equals(writer.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); bool useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean(); Document doc = new Document(); Document docNoGroup = new Document(); Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = NewStringField("group", "", Field.Store.NO); Field groupDc = new SortedDocValuesField("group_dv", new BytesRef()); if (useDv) { doc.Add(groupDc); docNoFacet.Add(groupDc); } doc.Add(group); docNoFacet.Add(group); Field[] facetFields; if (useDv) { Debug.Assert(!multipleFacetValuesPerDocument); facetFields = new Field[2]; facetFields[0] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[0]); docNoGroup.Add(facetFields[0]); facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef()); doc.Add(facetFields[1]); docNoGroup.Add(facetFields[1]); } else { facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1]; for (int i = 0; i < facetFields.Length; i++) { facetFields[i] = NewStringField("facet", "", Field.Store.NO); doc.Add(facetFields[i]); docNoGroup.Add(facetFields[i]); } } Field content = NewStringField("content", "", Field.Store.NO); doc.Add(content); docNoGroup.Add(content); docNoFacet.Add(content); docNoGroupNoFacet.Add(content); // LUCENENET NOTE: TreeSet (the class used in Java) allows duplicate keys. However, SortedSet seems to work, // and based on the name of the variable, presuming the entries are meant to be unique. ISet <string> uniqueFacetValues = new SortedSet <string>(new ComparerAnonymousHelper1()); // LUCENENET NOTE: Need HashMap here because of null keys IDictionary <string, HashMap <string, ISet <string> > > searchTermToFacetToGroups = new Dictionary <string, HashMap <string, ISet <string> > >(); int facetWithMostGroups = 0; for (int i = 0; i < numDocs; i++) { string groupValue; if (random.nextInt(24) == 17) { // So we test the "doc doesn't have the group'd // field" case: if (useDv) { groupValue = ""; } else { groupValue = null; } } else { groupValue = groups[random.nextInt(groups.size())]; } string contentStr = contentBrs[random.nextInt(contentBrs.Length)]; if (!searchTermToFacetToGroups.ContainsKey(contentStr)) { searchTermToFacetToGroups[contentStr] = new HashMap <string, ISet <string> >(); } IDictionary <string, ISet <string> > facetToGroups = searchTermToFacetToGroups[contentStr]; List <string> facetVals = new List <string>(); if (useDv || random.nextInt(24) != 18) { if (useDv) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.ContainsKey(facetValue)) { facetToGroups[facetValue] = new HashSet <string>(); } ISet <string> groupsInFacet = facetToGroups[facetValue]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetFields[0].SetStringValue(facetValue); facetFields[1].SetBytesValue(new BytesRef(facetValue)); facetVals.Add(facetValue); } else { foreach (Field facetField in facetFields) { string facetValue = facetValues[random.nextInt(facetValues.size())]; uniqueFacetValues.Add(facetValue); if (!facetToGroups.ContainsKey(facetValue)) { facetToGroups[facetValue] = new HashSet <string>(); } ISet <string> groupsInFacet = facetToGroups[facetValue]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } facetField.SetStringValue(facetValue); facetVals.Add(facetValue); } } } else { uniqueFacetValues.Add(null); if (!facetToGroups.ContainsKey(null)) { facetToGroups.Put(null, new HashSet <string>()); } ISet <string> groupsInFacet = facetToGroups[null]; groupsInFacet.add(groupValue); if (groupsInFacet.size() > facetWithMostGroups) { facetWithMostGroups = groupsInFacet.size(); } } if (VERBOSE) { Console.WriteLine(" doc content=" + contentStr + " group=" + (groupValue == null ? "null" : groupValue) + " facetVals=" + facetVals); } if (groupValue != null) { if (useDv) { groupDc.SetBytesValue(new BytesRef(groupValue)); } group.SetStringValue(groupValue); } else if (useDv) { // DV cannot have missing values: groupDc.SetBytesValue(new BytesRef()); } content.SetStringValue(contentStr); if (groupValue == null && !facetVals.Any()) { writer.AddDocument(docNoGroupNoFacet); } else if (!facetVals.Any()) { writer.AddDocument(docNoFacet); } else if (groupValue == null) { writer.AddDocument(docNoGroup); } else { writer.AddDocument(doc); } } DirectoryReader reader = writer.GetReader(); writer.Dispose(); return(new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv)); }
public void TestRandom() { Random random = Random; int numberOfRuns = TestUtil.NextInt32(random, 3, 6); for (int indexIter = 0; indexIter < numberOfRuns; indexIter++) { bool multipleFacetsPerDocument = random.nextBoolean(); IndexContext context = CreateIndexContext(multipleFacetsPerDocument); IndexSearcher searcher = NewSearcher(context.indexReader); if (VERBOSE) { Console.WriteLine("TEST: searcher=" + searcher); } for (int searchIter = 0; searchIter < 100; searchIter++) { if (VERBOSE) { Console.WriteLine("TEST: searchIter=" + searchIter); } bool useDv = !multipleFacetsPerDocument && context.useDV && random.nextBoolean(); string searchTerm = context.contentStrings[random.nextInt(context.contentStrings.Length)]; int limit = random.nextInt(context.facetValues.size()); int offset = random.nextInt(context.facetValues.size() - limit); int size = offset + limit; int minCount = random.nextBoolean() ? 0 : random.nextInt(1 + context.facetWithMostGroups / 10); bool orderByCount = random.nextBoolean(); string randomStr = GetFromSet(context.facetValues, random.nextInt(context.facetValues.size())); string facetPrefix; if (randomStr == null) { facetPrefix = null; } else { int codePointLen = randomStr.CodePointCount(0, randomStr.Length); int randomLen = random.nextInt(codePointLen); if (codePointLen == randomLen - 1) { facetPrefix = null; } else { int end = randomStr.OffsetByCodePoints(0, randomLen); facetPrefix = random.nextBoolean() ? null : randomStr.Substring(end); } } GroupedFacetResult expectedFacetResult = CreateExpectedFacetResult(searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix); AbstractGroupFacetCollector groupFacetCollector = CreateRandomCollector(useDv ? "group_dv" : "group", useDv ? "facet_dv" : "facet", facetPrefix, multipleFacetsPerDocument); searcher.Search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector); TermGroupFacetCollector.GroupedFacetResult actualFacetResult = groupFacetCollector.MergeSegmentResults(size, minCount, orderByCount); IList <TermGroupFacetCollector.FacetEntry> expectedFacetEntries = expectedFacetResult.GetFacetEntries(); IList <TermGroupFacetCollector.FacetEntry> actualFacetEntries = actualFacetResult.GetFacetEntries(offset, limit); if (VERBOSE) { Console.WriteLine("Use DV: " + useDv); Console.WriteLine("Collector: " + groupFacetCollector.GetType().Name); Console.WriteLine("Num group: " + context.numGroups); Console.WriteLine("Num doc: " + context.numDocs); Console.WriteLine("Index iter: " + indexIter); Console.WriteLine("multipleFacetsPerDocument: " + multipleFacetsPerDocument); Console.WriteLine("Search iter: " + searchIter); Console.WriteLine("Search term: " + searchTerm); Console.WriteLine("Min count: " + minCount); Console.WriteLine("Facet offset: " + offset); Console.WriteLine("Facet limit: " + limit); Console.WriteLine("Facet prefix: " + facetPrefix); Console.WriteLine("Order by count: " + orderByCount); Console.WriteLine("\n=== Expected: \n"); Console.WriteLine("Total count " + expectedFacetResult.TotalCount); Console.WriteLine("Total missing count " + expectedFacetResult.TotalMissingCount); int counter = 0; foreach (TermGroupFacetCollector.FacetEntry expectedFacetEntry in expectedFacetEntries) { Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "{0}. Expected facet value {1} with count {2}", counter++, expectedFacetEntry.Value.Utf8ToString(), expectedFacetEntry.Count ) ); } Console.WriteLine("\n=== Actual: \n"); Console.WriteLine("Total count " + actualFacetResult.TotalCount); Console.WriteLine("Total missing count " + actualFacetResult.TotalMissingCount); counter = 0; foreach (TermGroupFacetCollector.FacetEntry actualFacetEntry in actualFacetEntries) { Console.WriteLine( string.Format(CultureInfo.InvariantCulture, "{0}. Actual facet value {1} with count {2}", counter++, actualFacetEntry.Value.Utf8ToString(), actualFacetEntry.Count ) ); } Console.WriteLine("\n==================================================================================="); } assertEquals(expectedFacetResult.TotalCount, actualFacetResult.TotalCount); assertEquals(expectedFacetResult.TotalMissingCount, actualFacetResult.TotalMissingCount); assertEquals(expectedFacetEntries.size(), actualFacetEntries.size()); for (int i = 0; i < expectedFacetEntries.size(); i++) { TermGroupFacetCollector.FacetEntry expectedFacetEntry = expectedFacetEntries[i]; TermGroupFacetCollector.FacetEntry actualFacetEntry = actualFacetEntries[i]; assertEquals("i=" + i + ": " + expectedFacetEntry.Value.Utf8ToString() + " != " + actualFacetEntry.Value.Utf8ToString(), expectedFacetEntry.Value, actualFacetEntry.Value); assertEquals("i=" + i + ": " + expectedFacetEntry.Count + " != " + actualFacetEntry.Count, expectedFacetEntry.Count, actualFacetEntry.Count); } } context.indexReader.Dispose(); context.dir.Dispose(); } }
public virtual void Test2() { Random random = Random(); int NUM_DOCS = AtLeast(100); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, Similarity, TimeZone); bool allowDups = random.NextBoolean(); HashSet <string> seen = new HashSet <string>(); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; IList <BytesRef> docValues = new List <BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { string s; if (random.NextBoolean()) { s = TestUtil.RandomSimpleString(random); } else { s = TestUtil.RandomUnicodeString(random); } BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.Contains(s)) { continue; } seen.Add(s); } if (VERBOSE) { Console.WriteLine(" " + numDocs + ": s=" + s); } Document doc = new Document(); doc.Add(new SortedDocValuesField("stringdv", br)); doc.Add(new NumericDocValuesField("id", numDocs)); docValues.Add(br); writer.AddDocument(doc); numDocs++; if (random.Next(40) == 17) { // force flush writer.Reader.Dispose(); } } writer.ForceMerge(1); DirectoryReader r = writer.Reader; writer.Dispose(); AtomicReader sr = GetOnlySegmentReader(r); long END_TIME = Environment.TickCount + (TEST_NIGHTLY ? 30 : 1); int NUM_THREADS = TestUtil.NextInt(Random(), 1, 10); ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int thread = 0; thread < NUM_THREADS; thread++) { threads[thread] = new ThreadAnonymousInnerClassHelper2(random, docValues, sr, END_TIME); threads[thread].Start(); } foreach (ThreadClass thread in threads) { thread.Join(); } r.Dispose(); dir.Dispose(); }
// FST is complete private void VerifyUnPruned(int inputMode, FST <T> fst) { FST <long?> fstLong; ISet <long?> validOutputs; long minLong = long.MaxValue; long maxLong = long.MinValue; if (doReverseLookup) { FST <long?> fstLong0 = fst as FST <long?>; fstLong = fstLong0; validOutputs = new HashSet <long?>(); foreach (InputOutput <T> pair in pairs) { long?output = pair.Output as long?; maxLong = Math.Max(maxLong, output.Value); minLong = Math.Min(minLong, output.Value); validOutputs.Add(output.Value); } } else { fstLong = null; validOutputs = null; } if (pairs.Count == 0) { Assert.IsNull(fst); return; } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now verify " + pairs.Count + " terms"); foreach (InputOutput <T> pair in pairs) { Assert.IsNotNull(pair); Assert.IsNotNull(pair.Input); Assert.IsNotNull(pair.Output); Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output)); } } Assert.IsNotNull(fst); // visit valid pairs in order -- make sure all words // are accepted, and FSTEnum's next() steps through // them correctly if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check valid terms/next()"); } { Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst); foreach (InputOutput <T> pair in pairs) { Int32sRef term = pair.Input; if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check term=" + InputToString(inputMode, term) + " output=" + fst.Outputs.OutputToString(pair.Output)); } T output = Run(fst, term, null); Assert.IsNotNull(output, "term " + InputToString(inputMode, term) + " is not accepted"); Assert.IsTrue(OutputsEqual(pair.Output, output)); // verify enum's next Int32sRefFSTEnum.InputOutput <T> t = fstEnum.Next(); Assert.IsNotNull(t); Assert.AreEqual(term, t.Input, "expected input=" + InputToString(inputMode, term) + " but fstEnum returned " + InputToString(inputMode, t.Input)); Assert.IsTrue(OutputsEqual(pair.Output, t.Output)); } Assert.IsNull(fstEnum.Next()); } IDictionary <Int32sRef, T> termsMap = new Dictionary <Int32sRef, T>(); foreach (InputOutput <T> pair in pairs) { termsMap[pair.Input] = pair.Output; } if (doReverseLookup && maxLong > minLong) { // Do random lookups so we test null (output doesn't // exist) case: Assert.IsNull(Util.GetByOutput(fstLong, minLong - 7)); Assert.IsNull(Util.GetByOutput(fstLong, maxLong + 7)); int num = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num; iter++) { long v = TestUtil.NextInt64(random, minLong, maxLong); Int32sRef input = Util.GetByOutput(fstLong, v); Assert.IsTrue(validOutputs.Contains(v) || input == null); } } // find random matching word and make sure it's valid if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify random accepted terms"); } Int32sRef scratch = new Int32sRef(10); int num_ = LuceneTestCase.AtLeast(random, 500); for (int iter = 0; iter < num_; iter++) { T output = RandomAcceptedWord(fst, scratch); Assert.IsTrue(termsMap.ContainsKey(scratch), "accepted word " + InputToString(inputMode, scratch) + " is not valid"); Assert.IsTrue(OutputsEqual(termsMap[scratch], output)); if (doReverseLookup) { //System.out.println("lookup output=" + output + " outs=" + fst.Outputs); Int32sRef input = Util.GetByOutput(fstLong, (output as long?).Value); Assert.IsNotNull(input); //System.out.println(" got " + Util.toBytesRef(input, new BytesRef()).utf8ToString()); Assert.AreEqual(scratch, input); } } // test IntsRefFSTEnum.Seek: if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify seek"); } Int32sRefFSTEnum <T> fstEnum_ = new Int32sRefFSTEnum <T>(fst); num_ = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num_; iter++) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" iter=" + iter); } if (random.NextBoolean()) { // seek to term that doesn't exist: while (true) { Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode); int pos = pairs.BinarySearch(new InputOutput <T>(term, default(T))); if (pos < 0) { pos = -(pos + 1); // ok doesn't exist //System.out.println(" seek " + inputToString(inputMode, term)); Int32sRefFSTEnum.InputOutput <T> seekResult; if (random.Next(3) == 0) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekExact term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekExact(term); pos = -1; } else if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekFloor term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekFloor(term); pos--; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekCeil term=" + InputToString(inputMode, term)); } seekResult = fstEnum_.SeekCeil(term); } if (pos != -1 && pos < pairs.Count) { //System.out.println(" got " + inputToString(inputMode,seekResult.input) + " output=" + fst.Outputs.outputToString(seekResult.Output)); Assert.IsNotNull(seekResult, "got null but expected term=" + InputToString(inputMode, pairs[pos].Input)); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" got " + InputToString(inputMode, seekResult.Input)); } Assert.AreEqual(pairs[pos].Input, seekResult.Input, "expected " + InputToString(inputMode, pairs[pos].Input) + " but got " + InputToString(inputMode, seekResult.Input)); Assert.IsTrue(OutputsEqual(pairs[pos].Output, seekResult.Output)); } else { // seeked before start or beyond end //System.out.println("seek=" + seekTerm); Assert.IsNull(seekResult, "expected null but got " + (seekResult == null ? "null" : InputToString(inputMode, seekResult.Input))); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" got null"); } } break; } } } else { // seek to term that does exist: InputOutput <T> pair = pairs[random.Next(pairs.Count)]; Int32sRefFSTEnum.InputOutput <T> seekResult; if (random.Next(3) == 2) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekExact term=" + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekExact(pair.Input); } else if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekFloor " + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekFloor(pair.Input); } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do exists seekCeil " + InputToString(inputMode, pair.Input)); } seekResult = fstEnum_.SeekCeil(pair.Input); } Assert.IsNotNull(seekResult); Assert.AreEqual(pair.Input, seekResult.Input, "got " + InputToString(inputMode, seekResult.Input) + " but expected " + InputToString(inputMode, pair.Input)); Assert.IsTrue(OutputsEqual(pair.Output, seekResult.Output)); } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: mixed next/seek"); } // test mixed next/seek num_ = LuceneTestCase.AtLeast(random, 100); for (int iter = 0; iter < num_; iter++) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: iter " + iter); } // reset: fstEnum_ = new Int32sRefFSTEnum <T>(fst); int upto = -1; while (true) { bool isDone = false; if (upto == pairs.Count - 1 || random.NextBoolean()) { // next upto++; if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do next"); } isDone = fstEnum_.Next() == null; } else if (upto != -1 && upto < 0.75 * pairs.Count && random.NextBoolean()) { int attempt = 0; for (; attempt < 10; attempt++) { Int32sRef term = ToInt32sRef(GetRandomString(random), inputMode); if (!termsMap.ContainsKey(term) && term.CompareTo(pairs[upto].Input) > 0) { int pos = pairs.BinarySearch(new InputOutput <T>(term, default(T))); Debug.Assert(pos < 0); upto = -(pos + 1); if (random.NextBoolean()) { upto--; Assert.IsTrue(upto != -1); if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekFloor(" + InputToString(inputMode, term) + ")"); } isDone = fstEnum_.SeekFloor(term) == null; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do non-exist seekCeil(" + InputToString(inputMode, term) + ")"); } isDone = fstEnum_.SeekCeil(term) == null; } break; } } if (attempt == 10) { continue; } } else { int inc = random.Next(pairs.Count - upto - 1); upto += inc; if (upto == -1) { upto = 0; } if (random.NextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do seekCeil(" + InputToString(inputMode, pairs[upto].Input) + ")"); } isDone = fstEnum_.SeekCeil(pairs[upto].Input) == null; } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" do seekFloor(" + InputToString(inputMode, pairs[upto].Input) + ")"); } isDone = fstEnum_.SeekFloor(pairs[upto].Input) == null; } } if (LuceneTestCase.VERBOSE) { if (!isDone) { Console.WriteLine(" got " + InputToString(inputMode, fstEnum_.Current.Input)); } else { Console.WriteLine(" got null"); } } if (upto == pairs.Count) { Assert.IsTrue(isDone); break; } else { Assert.IsFalse(isDone); Assert.AreEqual(pairs[upto].Input, fstEnum_.Current.Input); Assert.IsTrue(OutputsEqual(pairs[upto].Output, fstEnum_.Current.Output)); /* * if (upto < pairs.size()-1) { * int tryCount = 0; * while(tryCount < 10) { * final IntsRef t = toIntsRef(getRandomString(), inputMode); * if (pairs.get(upto).input.compareTo(t) < 0) { * final boolean expected = t.compareTo(pairs.get(upto+1).input) < 0; * if (LuceneTestCase.VERBOSE) { * System.out.println("TEST: call beforeNext(" + inputToString(inputMode, t) + "); current=" + inputToString(inputMode, pairs.get(upto).input) + " next=" + inputToString(inputMode, pairs.get(upto+1).input) + " expected=" + expected); * } * Assert.AreEqual(expected, fstEnum.beforeNext(t)); * break; * } * tryCount++; * } * } */ } } } }
public virtual void TestNoWaitClose() { Directory directory = NewDirectory(); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.IsTokenized = false; Field idField = NewField("id", "", customType); doc.Add(idField); for (int pass = 0; pass < 2; pass++) { if (VERBOSE) { Console.WriteLine("TEST: pass="******"TEST: iter=" + iter); } for (int j = 0; j < 199; j++) { idField.SetStringValue(Convert.ToString(iter * 201 + j)); writer.AddDocument(doc); } int delID = iter * 199; for (int j = 0; j < 20; j++) { writer.DeleteDocuments(new Term("id", Convert.ToString(delID))); delID += 5; } // Force a bunch of merge threads to kick off so we // stress out aborting them on close: ((LogMergePolicy)writer.Config.MergePolicy).MergeFactor = 2; IndexWriter finalWriter = writer; List <Exception> failure = new List <Exception>(); ThreadClass t1 = new ThreadAnonymousInnerClassHelper(this, doc, finalWriter, failure); if (failure.Count > 0) { throw failure[0]; } t1.Start(); writer.Dispose(false); t1.Join(); // Make sure reader can read IndexReader reader = DirectoryReader.Open(directory); reader.Dispose(); // Reopen writer = new IndexWriter(directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.APPEND).SetMergePolicy(NewLogMergePolicy())); } writer.Dispose(); } directory.Dispose(); }
// FST is pruned private void VerifyPruned(int inputMode, FST <T> fst, int prune1, int prune2) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now verify pruned " + pairs.Count + " terms; outputs=" + outputs); foreach (InputOutput <T> pair in pairs) { Console.WriteLine(" " + InputToString(inputMode, pair.Input) + ": " + outputs.OutputToString(pair.Output)); } } // To validate the FST, we brute-force compute all prefixes // in the terms, matched to their "common" outputs, prune that // set according to the prune thresholds, then assert the FST // matches that same set. // NOTE: Crazy RAM intensive!! //System.out.println("TEST: tally prefixes"); // build all prefixes IDictionary <Int32sRef, CountMinOutput <T> > prefixes = new HashMap <Int32sRef, CountMinOutput <T> >(); Int32sRef scratch = new Int32sRef(10); foreach (InputOutput <T> pair in pairs) { scratch.CopyInt32s(pair.Input); for (int idx = 0; idx <= pair.Input.Length; idx++) { scratch.Length = idx; CountMinOutput <T> cmo = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null; if (cmo == null) { cmo = new CountMinOutput <T>(); cmo.Count = 1; cmo.Output = pair.Output; prefixes[Int32sRef.DeepCopyOf(scratch)] = cmo; } else { cmo.Count++; T output1 = cmo.Output; if (output1.Equals(outputs.NoOutput)) { output1 = outputs.NoOutput; } T output2 = pair.Output; if (output2.Equals(outputs.NoOutput)) { output2 = outputs.NoOutput; } cmo.Output = outputs.Common(output1, output2); } if (idx == pair.Input.Length) { cmo.IsFinal = true; cmo.FinalOutput = cmo.Output; } } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: now prune"); } // prune 'em // LUCENENET NOTE: Altered this a bit to go in reverse rather than use an enumerator since // in .NET you cannot delete records while enumerating forward through a dictionary. for (int i = prefixes.Count - 1; i >= 0; i--) { KeyValuePair <Int32sRef, CountMinOutput <T> > ent = prefixes.ElementAt(i); Int32sRef prefix = ent.Key; CountMinOutput <T> cmo = ent.Value; if (LuceneTestCase.VERBOSE) { Console.WriteLine(" term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal); } bool keep; if (prune1 > 0) { keep = cmo.Count >= prune1; } else { Debug.Assert(prune2 > 0); if (prune2 > 1 && cmo.Count >= prune2) { keep = true; } else if (prefix.Length > 0) { // consult our parent scratch.Length = prefix.Length - 1; Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length); CountMinOutput <T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null; //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count)); keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1))); } else if (cmo.Count >= prune2) { keep = true; } else { keep = false; } } if (!keep) { prefixes.Remove(prefix); //System.out.println(" remove"); } else { // clear isLeaf for all ancestors //System.out.println(" keep"); scratch.CopyInt32s(prefix); scratch.Length--; while (scratch.Length >= 0) { CountMinOutput <T> cmo2 = prefixes.ContainsKey(scratch) ? prefixes[scratch] : null; if (cmo2 != null) { //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch)); cmo2.IsLeaf = false; } scratch.Length--; } } } if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: after prune"); foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes) { Console.WriteLine(" " + InputToString(inputMode, ent.Key, false) + ": isLeaf=" + ent.Value.IsLeaf + " isFinal=" + ent.Value.IsFinal); if (ent.Value.IsFinal) { Console.WriteLine(" finalOutput=" + outputs.OutputToString(ent.Value.FinalOutput)); } } } if (prefixes.Count <= 1) { Assert.IsNull(fst); return; } Assert.IsNotNull(fst); // make sure FST only enums valid prefixes if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: check pruned enum"); } Int32sRefFSTEnum <T> fstEnum = new Int32sRefFSTEnum <T>(fst); Int32sRefFSTEnum.InputOutput <T> current; while ((current = fstEnum.Next()) != null) { if (LuceneTestCase.VERBOSE) { Console.WriteLine(" fstEnum.next prefix=" + InputToString(inputMode, current.Input, false) + " output=" + outputs.OutputToString(current.Output)); } CountMinOutput <T> cmo = prefixes.ContainsKey(current.Input) ? prefixes[current.Input] : null; Assert.IsNotNull(cmo); Assert.IsTrue(cmo.IsLeaf || cmo.IsFinal); //if (cmo.isFinal && !cmo.isLeaf) { if (cmo.IsFinal) { Assert.AreEqual(cmo.FinalOutput, current.Output); } else { Assert.AreEqual(cmo.Output, current.Output); } } // make sure all non-pruned prefixes are present in the FST if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify all prefixes"); } int[] stopNode = new int[1]; foreach (KeyValuePair <Int32sRef, CountMinOutput <T> > ent in prefixes) { if (ent.Key.Length > 0) { CountMinOutput <T> cmo = ent.Value; T output = Run(fst, ent.Key, stopNode); if (LuceneTestCase.VERBOSE) { Console.WriteLine("TEST: verify prefix=" + InputToString(inputMode, ent.Key, false) + " output=" + outputs.OutputToString(cmo.Output)); } // if (cmo.isFinal && !cmo.isLeaf) { if (cmo.IsFinal) { Assert.AreEqual(cmo.FinalOutput, output); } else { Assert.AreEqual(cmo.Output, output); } Assert.AreEqual(ent.Key.Length, stopNode[0]); } } }
public virtual void TestAccquireReleaseRace() { DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl(); ctrl.UpdateStalled(false); AtomicBoolean stop = new AtomicBoolean(false); AtomicBoolean checkPoint = new AtomicBoolean(true); int numStallers = AtLeast(1); int numReleasers = AtLeast(1); int numWaiters = AtLeast(1); var sync = new Synchronizer(numStallers + numReleasers, numStallers + numReleasers + numWaiters); var threads = new ThreadClass[numReleasers + numStallers + numWaiters]; IList <Exception> exceptions = new SynchronizedList <Exception>(); for (int i = 0; i < numReleasers; i++) { threads[i] = new Updater(stop, checkPoint, ctrl, sync, true, exceptions); } for (int i = numReleasers; i < numReleasers + numStallers; i++) { threads[i] = new Updater(stop, checkPoint, ctrl, sync, false, exceptions); } for (int i = numReleasers + numStallers; i < numReleasers + numStallers + numWaiters; i++) { threads[i] = new Waiter(stop, checkPoint, ctrl, sync, exceptions); } Start(threads); int iters = AtLeast(10000); float checkPointProbability = TEST_NIGHTLY ? 0.5f : 0.1f; for (int i = 0; i < iters; i++) { if (checkPoint.Get()) { Assert.IsTrue(sync.UpdateJoin.Wait(new TimeSpan(0, 0, 0, 10)), "timed out waiting for update threads - deadlock?"); if (exceptions.Count > 0) { foreach (Exception throwable in exceptions) { Console.WriteLine(throwable.ToString()); Console.Write(throwable.StackTrace); } Assert.Fail("got exceptions in threads"); } if (ctrl.HasBlocked && ctrl.IsHealthy) { AssertState(numReleasers, numStallers, numWaiters, threads, ctrl); } checkPoint.Set(false); sync.Waiter.Signal(); sync.LeftCheckpoint.Wait(); } Assert.IsFalse(checkPoint.Get()); Assert.AreEqual(0, sync.Waiter.CurrentCount); if (checkPointProbability >= (float)Random().NextDouble()) { sync.Reset(numStallers + numReleasers, numStallers + numReleasers + numWaiters); checkPoint.Set(true); } } if (!checkPoint.Get()) { sync.Reset(numStallers + numReleasers, numStallers + numReleasers + numWaiters); checkPoint.Set(true); } Assert.IsTrue(sync.UpdateJoin.Wait(new TimeSpan(0, 0, 0, 10))); AssertState(numReleasers, numStallers, numWaiters, threads, ctrl); checkPoint.Set(false); stop.Set(true); sync.Waiter.Signal(); sync.LeftCheckpoint.Wait(); for (int i = 0; i < threads.Length; i++) { ctrl.UpdateStalled(false); threads[i].Join(2000); if (threads[i].IsAlive && threads[i] is Waiter) { if (threads[i].State == ThreadState.WaitSleepJoin) { Assert.Fail("waiter is not released - anyThreadsStalled: " + ctrl.AnyStalledThreads()); } } } }
public virtual void Test2BTerms_Mem([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { if ("Lucene3x".Equals(Codec.Default.Name, StringComparison.Ordinal)) { throw new Exception("this test cannot run with PreFlex codec"); } Console.WriteLine("Starting Test2B"); long TERM_COUNT = ((long)int.MaxValue) + 100000000; int TERMS_PER_DOC = TestUtil.NextInt(Random(), 100000, 1000000); IList <BytesRef> savedTerms = null; BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BTerms")); //MockDirectoryWrapper dir = NewFSDirectory(new File("/p/lucene/indices/2bindex")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } dir.CheckIndexOnClose = false; // don't double-checkindex if (true) { IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE)); MergePolicy mp = w.Config.MergePolicy; if (mp is LogByteSizeMergePolicy) { // 1 petabyte: ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1024 * 1024 * 1024; } Documents.Document doc = new Documents.Document(); MyTokenStream ts = new MyTokenStream(Random(), TERMS_PER_DOC); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.IndexOptions = IndexOptions.DOCS_ONLY; customType.OmitNorms = true; Field field = new Field("field", ts, customType); doc.Add(field); //w.setInfoStream(System.out); int numDocs = (int)(TERM_COUNT / TERMS_PER_DOC); Console.WriteLine("TERMS_PER_DOC=" + TERMS_PER_DOC); Console.WriteLine("numDocs=" + numDocs); for (int i = 0; i < numDocs; i++) { long t0 = Environment.TickCount; w.AddDocument(doc); Console.WriteLine(i + " of " + numDocs + " " + (Environment.TickCount - t0) + " msec"); } savedTerms = ts.SavedTerms; Console.WriteLine("TEST: full merge"); w.ForceMerge(1); Console.WriteLine("TEST: close writer"); w.Dispose(); } Console.WriteLine("TEST: open reader"); IndexReader r = DirectoryReader.Open(dir); if (savedTerms == null) { savedTerms = FindTerms(r); } int numSavedTerms = savedTerms.Count; IList <BytesRef> bigOrdTerms = new List <BytesRef>(savedTerms.SubList(numSavedTerms - 10, numSavedTerms)); Console.WriteLine("TEST: test big ord terms..."); TestSavedTerms(r, bigOrdTerms); Console.WriteLine("TEST: test all saved terms..."); TestSavedTerms(r, savedTerms); r.Dispose(); Console.WriteLine("TEST: now CheckIndex..."); CheckIndex.Status status = TestUtil.CheckIndex(dir); long tc = status.SegmentInfos[0].TermIndexStatus.TermCount; Assert.IsTrue(tc > int.MaxValue, "count " + tc + " is not > " + int.MaxValue); dir.Dispose(); Console.WriteLine("TEST: done!"); }
public virtual void TestNRTAndCommit() { Directory dir = NewDirectory(); NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); RandomIndexWriter w = new RandomIndexWriter(Random(), cachedDir, conf); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); int numDocs = TestUtil.NextInt(Random(), 100, 400); if (VERBOSE) { Console.WriteLine("TEST: numDocs=" + numDocs); } IList <BytesRef> ids = new List <BytesRef>(); DirectoryReader r = null; for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = docs.NextDoc(); ids.Add(new BytesRef(doc.Get("docid"))); w.AddDocument(doc); if (Random().Next(20) == 17) { if (r == null) { r = DirectoryReader.Open(w.w, false); } else { DirectoryReader r2 = DirectoryReader.OpenIfChanged(r); if (r2 != null) { r.Dispose(); r = r2; } } Assert.AreEqual(1 + docCount, r.NumDocs); IndexSearcher s = NewSearcher(r); // Just make sure search can run; we can't assert // totHits since it could be 0 TopDocs hits = s.Search(new TermQuery(new Term("body", "the")), 10); // System.out.println("tot hits " + hits.totalHits); } } if (r != null) { r.Dispose(); } // Close should force cache to clear since all files are sync'd w.Dispose(); string[] cachedFiles = cachedDir.ListCachedFiles(); foreach (string file in cachedFiles) { Console.WriteLine("FAIL: cached file " + file + " remains after sync"); } Assert.AreEqual(0, cachedFiles.Length); r = DirectoryReader.Open(dir); foreach (BytesRef id in ids) { Assert.AreEqual(1, r.DocFreq(new Term("docid", id))); } r.Dispose(); cachedDir.Dispose(); docs.Dispose(); }
public virtual void TestL2OBasic() { LabelToOrdinal map = new LabelToOrdinalMap(); CompactLabelToOrdinal compact = new CompactLabelToOrdinal(200, 0.15f, 3); int n = 50; string[] uniqueValues = new string[] { @"�", @"�r�G��F�\u0382�7\u0019�h�\u0015���#\u001d3\r{��q�_���Ԃ������", "foo bar one", new string(new char[] { (char)65533, (char)65533, (char)65, (char)65533, (char)45, (char)106, (char)40, (char)643, (char)65533, (char)11, (char)65533, (char)88, (char)65533, (char)78, (char)126, (char)56, (char)12, (char)71 }), "foo bar two", "foo bar three", "foo bar four", "foo bar five", "foo bar six", "foo bar seven", "foo bar eight", "foo bar nine", "foo bar ten", "foo/bar/one", "foo/bar/two", "foo/bar/three", "foo/bar/four", "foo/bar/five", "foo/bar/six", "foo/bar/seven", "foo/bar/eight", "foo/bar/nine", "foo/bar/ten", "" }; var tmpDir = CreateTempDir("testLableToOrdinal"); var f = new FileInfo(Path.Combine(tmpDir.FullName, "CompactLabelToOrdinalTest.tmp")); int flushInterval = 10; for (int i = 0; i < n; i++) { if (i > 0 && i % flushInterval == 0) { using (var fileStream = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite)) { compact.Flush(fileStream); } compact = CompactLabelToOrdinal.Open(f, 0.15f, 3); //assertTrue(f.Delete()); f.Delete(); assertFalse(File.Exists(f.FullName)); if (flushInterval < (n / 10)) { flushInterval *= 10; } } FacetLabel label = new FacetLabel(); foreach (string s in uniqueValues) { if (s.Length == 0) { label = new FacetLabel(); } else { label = new FacetLabel(s.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)); } int ord1 = map.GetOrdinal(label); int ord2 = compact.GetOrdinal(label); if (VERBOSE) { Console.WriteLine("Testing label: " + label.ToString()); } assertEquals(ord1, ord2); if (ord1 == LabelToOrdinal.INVALID_ORDINAL) { ord1 = compact.GetNextOrdinal(); map.AddLabel(label, ord1); compact.AddLabel(label, ord1); } } } for (int i = 0; i < uniqueValues.Length; i++) { FacetLabel label; string s = uniqueValues[i]; if (s.Length == 0) { label = new FacetLabel(); } else { label = new FacetLabel(s.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)); } int ord1 = map.GetOrdinal(label); int ord2 = compact.GetOrdinal(label); if (VERBOSE) { Console.WriteLine("Testing label 2: " + label.ToString()); } assertEquals(ord1, ord2); } }
public override void SetUp() { base.SetUp(); // LUCENENET specific: Moved this logic here to ensure that it is executed // after the class is setup - a field is way to early to execute this. bool supportsDocValues = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) == false; AllSortFields = new List <SortField>(Arrays.AsList(new SortField[] { #pragma warning disable 612,618 new SortField("byte", SortFieldType.BYTE, false), new SortField("short", SortFieldType.INT16, false), #pragma warning restore 612,618 new SortField("int", SortFieldType.INT32, false), new SortField("long", SortFieldType.INT64, false), new SortField("float", SortFieldType.SINGLE, false), new SortField("double", SortFieldType.DOUBLE, false), new SortField("bytes", SortFieldType.STRING, false), new SortField("bytesval", SortFieldType.STRING_VAL, false), #pragma warning disable 612,618 new SortField("byte", SortFieldType.BYTE, true), new SortField("short", SortFieldType.INT16, true), #pragma warning restore 612,618 new SortField("int", SortFieldType.INT32, true), new SortField("long", SortFieldType.INT64, true), new SortField("float", SortFieldType.SINGLE, true), new SortField("double", SortFieldType.DOUBLE, true), new SortField("bytes", SortFieldType.STRING, true), new SortField("bytesval", SortFieldType.STRING_VAL, true), SortField.FIELD_SCORE, SortField.FIELD_DOC })); if (supportsDocValues) { AllSortFields.AddRange(Arrays.AsList(new SortField[] { new SortField("intdocvalues", SortFieldType.INT32, false), new SortField("floatdocvalues", SortFieldType.SINGLE, false), new SortField("sortedbytesdocvalues", SortFieldType.STRING, false), new SortField("sortedbytesdocvaluesval", SortFieldType.STRING_VAL, false), new SortField("straightbytesdocvalues", SortFieldType.STRING_VAL, false), new SortField("intdocvalues", SortFieldType.INT32, true), new SortField("floatdocvalues", SortFieldType.SINGLE, true), new SortField("sortedbytesdocvalues", SortFieldType.STRING, true), new SortField("sortedbytesdocvaluesval", SortFieldType.STRING_VAL, true), new SortField("straightbytesdocvalues", SortFieldType.STRING_VAL, true) })); } // Also test missing first / last for the "string" sorts: foreach (string field in new string[] { "bytes", "sortedbytesdocvalues" }) { for (int rev = 0; rev < 2; rev++) { bool reversed = rev == 0; SortField sf = new SortField(field, SortFieldType.STRING, reversed); sf.MissingValue = SortField.STRING_FIRST; AllSortFields.Add(sf); sf = new SortField(field, SortFieldType.STRING, reversed); sf.MissingValue = SortField.STRING_LAST; AllSortFields.Add(sf); } } int limit = AllSortFields.Count; for (int i = 0; i < limit; i++) { SortField sf = AllSortFields[i]; if (sf.Type == SortFieldType.INT32) { SortField sf2 = new SortField(sf.Field, SortFieldType.INT32, sf.IsReverse); sf2.MissingValue = Random.Next(); AllSortFields.Add(sf2); } else if (sf.Type == SortFieldType.INT64) { SortField sf2 = new SortField(sf.Field, SortFieldType.INT64, sf.IsReverse); sf2.MissingValue = Random.NextInt64(); AllSortFields.Add(sf2); } else if (sf.Type == SortFieldType.SINGLE) { SortField sf2 = new SortField(sf.Field, SortFieldType.SINGLE, sf.IsReverse); sf2.MissingValue = (float)Random.NextDouble(); AllSortFields.Add(sf2); } else if (sf.Type == SortFieldType.DOUBLE) { SortField sf2 = new SortField(sf.Field, SortFieldType.DOUBLE, sf.IsReverse); sf2.MissingValue = Random.NextDouble(); AllSortFields.Add(sf2); } } Dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, Dir, Similarity, TimeZone); int numDocs = AtLeast(200); for (int i = 0; i < numDocs; i++) { IList <Field> fields = new List <Field>(); fields.Add(NewTextField("english", English.Int32ToEnglish(i), Field.Store.NO)); fields.Add(NewTextField("oddeven", (i % 2 == 0) ? "even" : "odd", Field.Store.NO)); fields.Add(NewStringField("byte", "" + ((sbyte)Random.Next()).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); fields.Add(NewStringField("short", "" + ((short)Random.Next()).ToString(CultureInfo.InvariantCulture), Field.Store.NO)); fields.Add(new Int32Field("int", Random.Next(), Field.Store.NO)); fields.Add(new Int64Field("long", Random.NextInt64(), Field.Store.NO)); fields.Add(new SingleField("float", (float)Random.NextDouble(), Field.Store.NO)); fields.Add(new DoubleField("double", Random.NextDouble(), Field.Store.NO)); fields.Add(NewStringField("bytes", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO)); fields.Add(NewStringField("bytesval", TestUtil.RandomRealisticUnicodeString(Random), Field.Store.NO)); fields.Add(new DoubleField("double", Random.NextDouble(), Field.Store.NO)); if (supportsDocValues) { fields.Add(new NumericDocValuesField("intdocvalues", Random.Next())); fields.Add(new SingleDocValuesField("floatdocvalues", (float)Random.NextDouble())); fields.Add(new SortedDocValuesField("sortedbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random)))); fields.Add(new SortedDocValuesField("sortedbytesdocvaluesval", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random)))); fields.Add(new BinaryDocValuesField("straightbytesdocvalues", new BytesRef(TestUtil.RandomRealisticUnicodeString(Random)))); } Document document = new Document(); document.Add(new StoredField("id", "" + i)); if (isVerbose) { Console.WriteLine(" add doc id=" + i); } foreach (Field field in fields) { // So we are sometimes missing that field: if (Random.Next(5) != 4) { document.Add(field); if (isVerbose) { Console.WriteLine(" " + field); } } } iw.AddDocument(document); if (Random.Next(50) == 17) { iw.Commit(); } } Reader = iw.GetReader(); iw.Dispose(); Searcher = NewSearcher(Reader); if (isVerbose) { Console.WriteLine(" searcher=" + Searcher); } }
public virtual void TestL2O() { LabelToOrdinal map = new LabelToOrdinalMap(); CompactLabelToOrdinal compact = new CompactLabelToOrdinal(2000000, 0.15f, 3); int n = AtLeast(10 * 1000); const int numUniqueValues = 50 * 1000; string[] uniqueValues = new string[numUniqueValues]; byte[] buffer = new byte[50]; // This is essentially the equivalent of // CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() // .onUnmappableCharacter(CodingErrorAction.REPLACE) // .onMalformedInput(CodingErrorAction.REPLACE); // // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, // new EncoderReplacementFallback("?"), // new DecoderReplacementFallback("?")); Random random = Random; for (int i = 0; i < numUniqueValues;) { random.NextBytes(buffer); int size = 1 + random.Next(buffer.Length); // This test is turning random bytes into a string, // this is asking for trouble. Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, new EncoderReplacementFallback("?"), new DecoderReplacementFallback("?")); uniqueValues[i] = decoder.GetString(buffer, 0, size); // we cannot have empty path components, so eliminate all prefix as well // as middle consecutive delimiter chars. uniqueValues[i] = Regex.Replace(uniqueValues[i], "/+", "/"); if (uniqueValues[i].StartsWith("/", StringComparison.Ordinal)) { uniqueValues[i] = uniqueValues[i].Substring(1); } if (uniqueValues[i].IndexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) { i++; } } var tmpDir = CreateTempDir("testLableToOrdinal"); var f = new FileInfo(Path.Combine(tmpDir.FullName, "CompactLabelToOrdinalTest.tmp")); int flushInterval = 10; for (int i = 0; i < n; i++) { if (i > 0 && i % flushInterval == 0) { using (var fileStream = new FileStream(f.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite)) { compact.Flush(fileStream); } compact = CompactLabelToOrdinal.Open(f, 0.15f, 3); //assertTrue(f.Delete()); f.Delete(); assertFalse(File.Exists(f.FullName)); if (flushInterval < (n / 10)) { flushInterval *= 10; } } int index = random.Next(numUniqueValues); FacetLabel label; string s = uniqueValues[index]; if (s.Length == 0) { label = new FacetLabel(); } else { label = new FacetLabel(s.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)); } int ord1 = map.GetOrdinal(label); int ord2 = compact.GetOrdinal(label); if (VERBOSE) { Console.WriteLine("Testing label: " + label.ToString()); } assertEquals(ord1, ord2); if (ord1 == LabelToOrdinal.INVALID_ORDINAL) { ord1 = compact.GetNextOrdinal(); map.AddLabel(label, ord1); compact.AddLabel(label, ord1); } } for (int i = 0; i < numUniqueValues; i++) { FacetLabel label; string s = uniqueValues[i]; if (s.Length == 0) { label = new FacetLabel(); } else { label = new FacetLabel(s.Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)); } int ord1 = map.GetOrdinal(label); int ord2 = compact.GetOrdinal(label); if (VERBOSE) { Console.WriteLine("Testing label 2: " + label.ToString()); } assertEquals(ord1, ord2); } }
public virtual void TestParseInt() { int test; try { test = ArrayUtil.ParseInt32("".ToCharArray()); Assert.IsTrue(false); } #pragma warning disable 168 catch (FormatException e) #pragma warning restore 168 { //expected } try { test = ArrayUtil.ParseInt32("foo".ToCharArray()); Assert.IsTrue(false); } #pragma warning disable 168 catch (FormatException e) #pragma warning restore 168 { //expected } try { test = ArrayUtil.ParseInt32(Convert.ToString(long.MaxValue).ToCharArray()); Assert.IsTrue(false); } #pragma warning disable 168 catch (FormatException e) #pragma warning restore 168 { //expected } try { test = ArrayUtil.ParseInt32("0.34".ToCharArray()); Assert.IsTrue(false); } #pragma warning disable 168 catch (FormatException e) #pragma warning restore 168 { //expected } try { test = ArrayUtil.ParseInt32("1".ToCharArray()); Assert.IsTrue(test == 1, test + " does not equal: " + 1); test = ArrayUtil.ParseInt32("-10000".ToCharArray()); Assert.IsTrue(test == -10000, test + " does not equal: " + -10000); test = ArrayUtil.ParseInt32("1923".ToCharArray()); Assert.IsTrue(test == 1923, test + " does not equal: " + 1923); test = ArrayUtil.ParseInt32("-1".ToCharArray()); Assert.IsTrue(test == -1, test + " does not equal: " + -1); test = ArrayUtil.ParseInt32("foo 1923 bar".ToCharArray(), 4, 4); Assert.IsTrue(test == 1923, test + " does not equal: " + 1923); } catch (FormatException e) { Console.WriteLine(e.ToString()); Console.Write(e.StackTrace); Assert.IsTrue(false); } }
public virtual void TestRandom() { string[] tokens = GetRandomTokens(10); Store.Directory indexDir = NewDirectory(); Store.Directory taxoDir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), indexDir, Similarity, TimeZone); var tw = new DirectoryTaxonomyWriter(taxoDir); FacetsConfig config = new FacetsConfig(); int numDocs = AtLeast(1000); int numDims = TestUtil.NextInt(Random(), 1, 7); IList <TestDoc> testDocs = GetRandomDocs(tokens, numDocs, numDims); foreach (TestDoc testDoc in testDocs) { Document doc = new Document(); doc.Add(NewStringField("content", testDoc.content, Field.Store.NO)); for (int j = 0; j < numDims; j++) { if (testDoc.dims[j] != null) { doc.Add(new FacetField("dim" + j, testDoc.dims[j])); } } w.AddDocument(config.Build(tw, doc)); } // NRT open IndexSearcher searcher = NewSearcher(w.Reader); // NRT open var tr = new DirectoryTaxonomyReader(tw); int iters = AtLeast(100); for (int iter = 0; iter < iters; iter++) { string searchToken = tokens[Random().Next(tokens.Length)]; if (VERBOSE) { Console.WriteLine("\nTEST: iter content=" + searchToken); } FacetsCollector fc = new FacetsCollector(); FacetsCollector.Search(searcher, new TermQuery(new Term("content", searchToken)), 10, fc); Facets facets = GetTaxonomyFacetCounts(tr, config, fc); // Slow, yet hopefully bug-free, faceting: var expectedCounts = new List <Dictionary <string, int?> >(); for (int i = 0; i < numDims; i++) { expectedCounts.Add(new Dictionary <string, int?>()); } foreach (TestDoc doc in testDocs) { if (doc.content.Equals(searchToken)) { for (int j = 0; j < numDims; j++) { if (doc.dims[j] != null) { int?v = expectedCounts[j].ContainsKey(doc.dims[j]) ? expectedCounts[j][doc.dims[j]] : null; if (v == null) { expectedCounts[j][doc.dims[j]] = 1; } else { expectedCounts[j][doc.dims[j]] = (int)v + 1; } } } } } List <FacetResult> expected = new List <FacetResult>(); for (int i = 0; i < numDims; i++) { List <LabelAndValue> labelValues = new List <LabelAndValue>(); int totCount = 0; foreach (KeyValuePair <string, int?> ent in expectedCounts[i]) { labelValues.Add(new LabelAndValue(ent.Key, ent.Value.Value)); totCount += ent.Value.Value; } SortLabelValues(labelValues); if (totCount > 0) { expected.Add(new FacetResult("dim" + i, new string[0], totCount, labelValues.ToArray(), labelValues.Count)); } } // Sort by highest value, tie break by value: SortFacetResults(expected); IList <FacetResult> actual = facets.GetAllDims(10); // Messy: fixup ties SortTies(actual); Assert.AreEqual(expected, actual); } IOUtils.Dispose(w, tw, searcher.IndexReader, tr, indexDir, taxoDir); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt32(Random, 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field"), StringComparison.Ordinal); IndexWriterConfig iwc = NewIndexWriterConfig(Random, TEST_VERSION_CURRENT, new MockAnalyzer(Random)); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList.Shuffle(); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir("bagofpostings")); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); int threadCount = TestUtil.NextInt32(Random, 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.IndexWriter.Config); Console.WriteLine("threadCount=" + threadCount); } ThreadJob[] threads = new ThreadJob[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadJob t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.GetReader(); Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.GetTerms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: #pragma warning disable 612, 618 Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount); if (iwc.Codec is Lucene3xCodec == false) #pragma warning restore 612, 618 { Assert.AreEqual(numTerms - 1, terms.Count); } TermsEnum termsEnum = terms.GetIterator(null); BytesRef term_; while ((term_ = termsEnum.Next()) != null) { int value = Convert.ToInt32(term_.Utf8ToString()); Assert.AreEqual(value, termsEnum.DocFreq); // don't really need to check more than this, as CheckIndex // will verify that docFreq == actual number of documents seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestFixedSorted([ValueSource(typeof(ConcurrentMergeSchedulerFactories), "Values")] Func <IConcurrentMergeScheduler> newScheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedSorted")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(newScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE)); Document doc = new Document(); var bytes = new byte[2]; BytesRef data = new BytesRef(bytes); SortedDocValuesField dvField = new SortedDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 8); bytes[1] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetSortedDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 8); bytes[1] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
/// <summary> /// Entry point to the Compile application. /// <para/> /// This program takes any number of arguments: the first is the name of the /// desired stemming algorithm to use (a list is available in the package /// description) , all of the rest should be the path or paths to a file or /// files containing a stemmer table to compile. /// </summary> /// <param name="args">the command line arguments</param> public static void Main(string[] args) { if (args.Length < 1) { return; } // LUCENENET NOTE: This line does nothing in .NET // and also does nothing in Java...what? //args[0].ToUpperInvariant(); // Reads the first char of the first arg backward = args[0][0] == '-'; int qq = (backward) ? 1 : 0; bool storeorig = false; if (args[0][qq] == '0') { storeorig = true; qq++; } multi = args[0][qq] == 'M'; if (multi) { qq++; } string charset = SystemProperties.GetProperty("egothor.stemmer.charset", "UTF-8"); var stemmerTables = new List <string>(); // LUCENENET specific // command line argument overrides environment variable or default, if supplied for (int i = 1; i < args.Length; i++) { if ("-e".Equals(args[i]) || "--encoding".Equals(args[i])) { charset = args[i]; } else { stemmerTables.Add(args[i]); } } char[] optimizer = new char[args[0].Length - qq]; for (int i = 0; i < optimizer.Length; i++) { optimizer[i] = args[0][qq + i]; } foreach (var stemmerTable in stemmerTables) { // System.out.println("[" + args[i] + "]"); Diff diff = new Diff(); //int stems = 0; // not used int words = 0; AllocTrie(); Console.WriteLine(stemmerTable); using (TextReader input = new StreamReader( new FileStream(stemmerTable, FileMode.Open, FileAccess.Read), Encoding.GetEncoding(charset))) { string line; while ((line = input.ReadLine()) != null) { try { line = line.ToLowerInvariant(); StringTokenizer st = new StringTokenizer(line); string stem = st.NextToken(); if (storeorig) { trie.Add(stem, "-a"); words++; } while (st.HasMoreTokens()) { string token = st.NextToken(); if (token.Equals(stem) == false) { trie.Add(token, diff.Exec(token, stem)); words++; } } } catch (InvalidOperationException /*x*/) { // no base token (stem) on a line } } } Optimizer o = new Optimizer(); Optimizer2 o2 = new Optimizer2(); Lift l = new Lift(true); Lift e = new Lift(false); Gener g = new Gener(); for (int j = 0; j < optimizer.Length; j++) { string prefix; switch (optimizer[j]) { case 'G': trie = trie.Reduce(g); prefix = "G: "; break; case 'L': trie = trie.Reduce(l); prefix = "L: "; break; case 'E': trie = trie.Reduce(e); prefix = "E: "; break; case '2': trie = trie.Reduce(o2); prefix = "2: "; break; case '1': trie = trie.Reduce(o); prefix = "1: "; break; default: continue; } trie.PrintInfo(Console.Out, prefix + " "); } using (DataOutputStream os = new DataOutputStream( new FileStream(stemmerTable + ".out", FileMode.OpenOrCreate, FileAccess.Write))) { os.WriteUTF(args[0]); trie.Store(os); } } }
public virtual void TestRandomDoubles() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, Similarity, TimeZone); int numDocs = AtLeast(1000); double[] values = new double[numDocs]; double minValue = double.PositiveInfinity; double maxValue = double.NegativeInfinity; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); double v = Random.NextDouble(); values[i] = v; doc.Add(new DoubleDocValuesField("field", v)); doc.Add(new DoubleField("field", v, Field.Store.NO)); w.AddDocument(doc); minValue = Math.Min(minValue, v); maxValue = Math.Max(maxValue, v); } IndexReader r = w.GetReader(); IndexSearcher s = NewSearcher(r); FacetsConfig config = new FacetsConfig(); int numIters = AtLeast(10); for (int iter = 0; iter < numIters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } int numRange = TestUtil.NextInt32(Random, 1, 5); DoubleRange[] ranges = new DoubleRange[numRange]; int[] expectedCounts = new int[numRange]; double minAcceptedValue = double.PositiveInfinity; double maxAcceptedValue = double.NegativeInfinity; for (int rangeID = 0; rangeID < numRange; rangeID++) { double min; if (rangeID > 0 && Random.Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random.Next(rangeID)]; if (Random.NextBoolean()) { min = prevRange.Min; } else { min = prevRange.Max; } } else { min = Random.NextDouble(); } double max; if (rangeID > 0 && Random.Next(10) == 7) { // Use an existing boundary: DoubleRange prevRange = ranges[Random.Next(rangeID)]; if (Random.NextBoolean()) { max = prevRange.Min; } else { max = prevRange.Max; } } else { max = Random.NextDouble(); } if (min > max) { double x = min; min = max; max = x; } bool minIncl; bool maxIncl; if (min == max) { minIncl = true; maxIncl = true; } else { minIncl = Random.NextBoolean(); maxIncl = Random.NextBoolean(); } ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl); // Do "slow but hopefully correct" computation of // expected count: for (int i = 0; i < numDocs; i++) { bool accept = true; if (minIncl) { accept &= values[i] >= min; } else { accept &= values[i] > min; } if (maxIncl) { accept &= values[i] <= max; } else { accept &= values[i] < max; } if (accept) { expectedCounts[rangeID]++; minAcceptedValue = Math.Min(minAcceptedValue, values[i]); maxAcceptedValue = Math.Max(maxAcceptedValue, values[i]); } } } FacetsCollector sfc = new FacetsCollector(); s.Search(new MatchAllDocsQuery(), sfc); Filter fastMatchFilter; if (Random.NextBoolean()) { if (Random.NextBoolean()) { fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minValue, maxValue, true, true); } else { fastMatchFilter = NumericRangeFilter.NewDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true); } } else { fastMatchFilter = null; } ValueSource vs = new DoubleFieldSource("field"); Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual(numRange, result.LabelValues.Length); for (int rangeID = 0; rangeID < numRange; rangeID++) { if (VERBOSE) { Console.WriteLine(" range " + rangeID + " expectedCount=" + expectedCounts[rangeID]); } LabelAndValue subNode = result.LabelValues[rangeID]; Assert.AreEqual("r" + rangeID, subNode.Label); Assert.AreEqual(expectedCounts[rangeID], (int)subNode.Value); DoubleRange range = ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(config); if (Random.NextBoolean()) { if (Random.NextBoolean()) { ddq.Add("field", NumericRangeFilter.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } else { ddq.Add("field", NumericRangeQuery.NewDoubleRange("field", range.Min, range.Max, range.MinInclusive, range.MaxInclusive)); } } else { ddq.Add("field", range.GetFilter(fastMatchFilter, vs)); } Assert.AreEqual(expectedCounts[rangeID], s.Search(ddq, 10).TotalHits); } } IOUtils.Dispose(w, r, dir); }