private string[] InitTasksPackages(Config config) { // LUCENENET specific - changing the logic a bit // to add all referenced assemblies by default. // The alt.tasks.packages parameter still exists, but // it is only necessary for assemblies that are not // referenced by the host assembly. ISet <string> result = new JCG.HashSet <string>(); string alts = config.Get("alt.tasks.packages", null); string dfltPkg = typeof(PerfTask).Assembly.GetName().Name; IEnumerable <string> referencedAssemblies = AssemblyUtils.GetReferencedAssemblies().Select(a => a.GetName().Name); result.Add(dfltPkg); if (alts == null) { result.UnionWith(referencedAssemblies); return(result.ToArray()); } foreach (string alt in alts.Split(',').TrimEnd()) { result.Add(alt); } result.UnionWith(referencedAssemblies); return(result.ToArray()); }
private void TestRandomWords(int maxNumWords, int numIter) { Random random = new Random(Random.Next()); for (int iter = 0; iter < numIter; iter++) { if (Verbose) { Console.WriteLine("\nTEST: iter " + iter); } for (int inputMode = 0; inputMode < 2; inputMode++) { int numWords = random.nextInt(maxNumWords + 1); ISet <Int32sRef> termsSet = new JCG.HashSet <Int32sRef>(); Int32sRef[] terms = new Int32sRef[numWords]; while (termsSet.size() < numWords) { string term = FSTTester <object> .GetRandomString(random); termsSet.Add(FSTTester <object> .ToInt32sRef(term, inputMode)); } DoTest(inputMode, termsSet.ToArray()); } } }
public override string[] ListAll() { lock (this) { ISet <string> files = new JCG.HashSet <string>(); foreach (string f in cache.ListAll()) { files.Add(f); } // LUCENE-1468: our NRTCachingDirectory will actually exist (RAMDir!), // but if the underlying delegate is an FSDir and mkdirs() has not // yet been called, because so far everything is a cached write, // in this case, we don't want to throw a NoSuchDirectoryException try { foreach (string f in @delegate.ListAll()) { // Cannot do this -- if lucene calls createOutput but // file already exists then this falsely trips: //assert !files.contains(f): "file \"" + f + "\" is in both dirs"; files.Add(f); } } catch (Exception ex) when(ex.IsNoSuchDirectoryException()) { // however, if there are no cached files, then the directory truly // does not "exist" if (files.Count == 0) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } return(files.ToArray()); } }
/// <summary>Extracts all terms texts of a given <see cref="Query"/> into an array of WeightedTerms</summary> /// <param name="query"><see cref="Query"/> to extract term texts from</param> /// <param name="prohibited"><c>true</c> to extract "prohibited" terms, too </param> /// <param name="fieldName"> The fieldName used to filter query terms</param> /// <returns>an array of the terms used in a query, plus their weights.</returns> public static WeightedTerm[] GetTerms(Query query, bool prohibited, string fieldName) { var terms = new JCG.HashSet <WeightedTerm>(); if (fieldName != null) { fieldName = fieldName.Intern(); } GetTerms(query, terms, prohibited, fieldName); return(terms.ToArray()); }
public override string[] ListAll() { ISet <string> files = new JCG.HashSet <string>(); // LUCENE-3380: either or both of our dirs could be FSDirs, // but if one underlying delegate is an FSDir and mkdirs() has not // yet been called, because so far everything is written to the other, // in this case, we don't want to throw a NoSuchDirectoryException DirectoryNotFoundException exc = null; try { foreach (string f in primaryDir.ListAll()) { files.Add(f); } } catch (DirectoryNotFoundException e) { exc = e; } try { foreach (string f in secondaryDir.ListAll()) { files.Add(f); } } catch (DirectoryNotFoundException /*e*/) { // we got NoSuchDirectoryException from both dirs // rethrow the first. if (exc != null) { throw exc; } // we got NoSuchDirectoryException from the secondary, // and the primary is empty. if (files.Count == 0) { throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } // we got NoSuchDirectoryException from the primary, // and the secondary is empty. if (exc != null && files.Count == 0) { throw exc; } return(files.ToArray()); }
public void TestRandom() { int numTerms = AtLeast(400); if (VERBOSE) { Console.WriteLine("Testing with {0} terms", numTerms); } ISet <string> terms = new JCG.HashSet <string>(); while (terms.Count < numTerms) { terms.Add(RandomString()); } TestTerms(terms.ToArray()); }
public virtual void TestAllocateAndFree() { RecyclingInt32BlockAllocator allocator = NewAllocator(); ISet <int[]> allocated = new JCG.HashSet <int[]>(); int freeButAllocated = 0; int[] block = allocator.GetInt32Block(); allocated.Add(block); Assert.IsNotNull(block); int size = block.Length; int numIters = AtLeast(97); for (int i = 0; i < numIters; i++) { int num = 1 + Random.Next(39); for (int j = 0; j < num; j++) { block = allocator.GetInt32Block(); freeButAllocated = Math.Max(0, freeButAllocated - 1); Assert.IsNotNull(block); Assert.AreEqual(size, block.Length); Assert.IsTrue(allocated.Add(block), "block is returned twice"); Assert.AreEqual(4 * size * (allocated.Count + allocator.NumBufferedBlocks), allocator.BytesUsed, "" + (4 * size * (allocated.Count + allocator.NumBufferedBlocks) - allocator.BytesUsed)); } int[][] array = allocated.ToArray(/*new int[0][]*/); int begin = Random.Next(array.Length); int end = begin + Random.Next(array.Length - begin); for (int j = begin; j < end; j++) { int[] b = array[j]; Assert.IsTrue(allocated.Remove(b)); } allocator.RecycleInt32Blocks(array, begin, end); for (int j = begin; j < end; j++) { Assert.IsNull(array[j]); } // randomly free blocks int numFreeBlocks = allocator.NumBufferedBlocks; int freeBlocks = allocator.FreeBlocks(Random.Next(7 + allocator.MaxBufferedBlocks)); Assert.AreEqual(allocator.NumBufferedBlocks, numFreeBlocks - freeBlocks); } }
protected internal RandomDocumentFactory(BaseTermVectorsFormatTestCase baseTermVectorsFormatTestCase, int distinctFieldNames, int disctinctTerms) { this.outerInstance = baseTermVectorsFormatTestCase; ISet <string> fieldNames = new JCG.HashSet <string>(); while (fieldNames.Count < distinctFieldNames) { fieldNames.Add(TestUtil.RandomSimpleString(Random)); fieldNames.Remove("id"); } this.fieldNames = fieldNames.ToArray(/*new string[0]*/); terms = new string[disctinctTerms]; termBytes = new BytesRef[disctinctTerms]; for (int i = 0; i < disctinctTerms; ++i) { terms[i] = TestUtil.RandomRealisticUnicodeString(Random); termBytes[i] = new BytesRef(terms[i]); } }
public virtual void TestAllocateAndRecycle() { RecyclingByteBlockAllocator allocator = NewAllocator(); var allocated = new JCG.HashSet <byte[]>(); var block = allocator.GetByteBlock(); allocated.Add(block); Assert.IsNotNull(block); int size = block.Length; int numIters = AtLeast(97); for (int i = 0; i < numIters; i++) { int num = 1 + Random.Next(39); for (int j = 0; j < num; j++) { block = allocator.GetByteBlock(); Assert.IsNotNull(block); Assert.AreEqual(size, block.Length); Assert.IsTrue(allocated.Add(block), "block is returned twice"); Assert.AreEqual(size * (allocated.Count + allocator.NumBufferedBlocks), allocator.BytesUsed); } var array = allocated.ToArray(); int begin = Random.Next(array.Length); int end = begin + Random.Next(array.Length - begin); var selected = new List <byte[]>(); for (int j = begin; j < end; j++) { selected.Add(array[j]); } allocator.RecycleByteBlocks(array, begin, end); for (int j = begin; j < end; j++) { Assert.IsNull(array[j]); var b = selected[0]; selected.RemoveAt(0); Assert.IsTrue(allocated.Remove(b)); } } }
public static void Main(string[] args) { if (args.Length < 4 || args.Length > 5) { // LUCENENET specific - our wrapper console shows correct usage throw new ArgumentException(); //Console.Error.WriteLine("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]"); //Console.Error.WriteLine("topicsFile: input file containing queries"); //Console.Error.WriteLine("qrelsFile: input file containing relevance judgements"); //Console.Error.WriteLine("submissionFile: output submission file for trec_eval"); //Console.Error.WriteLine("indexDir: index directory"); //Console.Error.WriteLine("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:"); //Console.Error.WriteLine("\texample: TD (query on Title + Description). The default is T (title only)"); //Environment.Exit(1); } FileInfo topicsFile = new FileInfo(args[0]); FileInfo qrelsFile = new FileInfo(args[1]); SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene"); using Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3])); using IndexReader reader = DirectoryReader.Open(dir); string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified. IndexSearcher searcher = new IndexSearcher(reader); int maxResults = 1000; string docNameField = "docname"; TextWriter logger = Console.Out; //new StreamWriter(Console, Encoding.GetEncoding(0)); // use trec utilities to read trec topics into quality queries TrecTopicsReader qReader = new TrecTopicsReader(); QualityQuery[] qqs = qReader.ReadQueries(IOUtils.GetDecodingReader(topicsFile, Encoding.UTF8)); // prepare judge, with trec utilities that read from a QRels file IJudge judge = new TrecJudge(IOUtils.GetDecodingReader(qrelsFile, Encoding.UTF8)); // validate topics & judgments match each other judge.ValidateData(qqs, logger); ISet <string> fieldSet = new JCG.HashSet <string>(); if (fieldSpec.IndexOf('T') >= 0) { fieldSet.Add("title"); } if (fieldSpec.IndexOf('D') >= 0) { fieldSet.Add("description"); } if (fieldSpec.IndexOf('N') >= 0) { fieldSet.Add("narrative"); } // set the parsing of quality queries into Lucene queries. IQualityQueryParser qqParser = new SimpleQQParser(fieldSet.ToArray(), "body"); // run the benchmark QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField); qrun.MaxResults = maxResults; QualityStats[] stats = qrun.Execute(judge, submitLog, logger); // print an avarage sum of the results QualityStats avg = QualityStats.Average(stats); avg.Log("SUMMARY", 2, logger, " "); }
public virtual void TestRandomWithPrefix() { Directory dir = NewDirectory(); ISet <string> prefixes = new JCG.HashSet <string>(); int numPrefix = TestUtil.NextInt32(Random, 2, 7); if (VERBOSE) { Console.WriteLine("TEST: use " + numPrefix + " prefixes"); } while (prefixes.Count < numPrefix) { prefixes.Add(TestUtil.RandomRealisticUnicodeString(Random)); //prefixes.Add(TestUtil.RandomSimpleString(random)); } string[] prefixesArray = prefixes.ToArray(/*new string[prefixes.Count]*/); int NUM_TERMS = AtLeast(20); ISet <BytesRef> terms = new JCG.HashSet <BytesRef>(); while (terms.Count < NUM_TERMS) { string s = prefixesArray[Random.Next(prefixesArray.Length)] + TestUtil.RandomRealisticUnicodeString(Random); //final String s = prefixesArray[random.nextInt(prefixesArray.Length)] + TestUtil.RandomSimpleString(random); if (s.Length > 0) { terms.Add(new BytesRef(s)); } } BytesRef[] termsArray = terms.ToArray(); Array.Sort(termsArray); int NUM_DOCS = AtLeast(100); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); // Sometimes swap in codec that impls ord(): if (Random.Next(10) == 7) { Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds")); conf.SetCodec(codec); } RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf); int[][] idToOrds = new int[NUM_DOCS][]; ISet <int?> ordsForDocSet = new JCG.HashSet <int?>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.Add(new Int32Field("id", id, Field.Store.NO)); int termCount = TestUtil.NextInt32(Random, 0, 20 * RANDOM_MULTIPLIER); while (ordsForDocSet.Count < termCount) { ordsForDocSet.Add(Random.Next(termsArray.Length)); } int[] ordsForDoc = new int[termCount]; int upto = 0; if (VERBOSE) { Console.WriteLine("TEST: doc id=" + id); } foreach (int ord in ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO); if (VERBOSE) { Console.WriteLine(" f=" + termsArray[ord].Utf8ToString()); } doc.Add(field); } ordsForDocSet.Clear(); Array.Sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.AddDocument(doc); } DirectoryReader r = w.GetReader(); w.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + r); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r); foreach (string prefix in prefixesArray) { BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix); int[][] idToOrdsPrefix = new int[NUM_DOCS][]; for (int id = 0; id < NUM_DOCS; id++) { int[] docOrds = idToOrds[id]; IList <int?> newOrds = new List <int?>(); foreach (int ord in idToOrds[id]) { if (StringHelper.StartsWith(termsArray[ord], prefixRef)) { newOrds.Add(ord); } } int[] newOrdsArray = new int[newOrds.Count]; int upto = 0; foreach (int ord in newOrds) { newOrdsArray[upto++] = ord; } idToOrdsPrefix[id] = newOrdsArray; } foreach (AtomicReaderContext ctx in r.Leaves) { if (VERBOSE) { Console.WriteLine("\nTEST: sub=" + ctx.Reader); } Verify((AtomicReader)ctx.Reader, idToOrdsPrefix, termsArray, prefixRef); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (VERBOSE) { Console.WriteLine("TEST: top reader"); } Verify(slowR, idToOrdsPrefix, termsArray, prefixRef); } FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey); r.Dispose(); dir.Dispose(); }
public virtual void TestRandom() { Directory dir = NewDirectory(); int NUM_TERMS = AtLeast(20); ISet <BytesRef> terms = new JCG.HashSet <BytesRef>(); while (terms.Count < NUM_TERMS) { string s = TestUtil.RandomRealisticUnicodeString(Random); //final String s = TestUtil.RandomSimpleString(random); if (s.Length > 0) { terms.Add(new BytesRef(s)); } } BytesRef[] termsArray = terms.ToArray(/*new BytesRef[terms.Count]*/); Array.Sort(termsArray); int NUM_DOCS = AtLeast(100); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); // Sometimes swap in codec that impls ord(): if (Random.Next(10) == 7) { // Make sure terms index has ords: Codec codec = TestUtil.AlwaysPostingsFormat(PostingsFormat.ForName("Lucene41WithOrds")); conf.SetCodec(codec); } RandomIndexWriter w = new RandomIndexWriter(Random, dir, conf); int[][] idToOrds = new int[NUM_DOCS][]; ISet <int?> ordsForDocSet = new JCG.HashSet <int?>(); for (int id = 0; id < NUM_DOCS; id++) { Document doc = new Document(); doc.Add(new Int32Field("id", id, Field.Store.NO)); int termCount = TestUtil.NextInt32(Random, 0, 20 * RandomMultiplier); while (ordsForDocSet.Count < termCount) { ordsForDocSet.Add(Random.Next(termsArray.Length)); } int[] ordsForDoc = new int[termCount]; int upto = 0; if (Verbose) { Console.WriteLine("TEST: doc id=" + id); } foreach (int ord in ordsForDocSet) { ordsForDoc[upto++] = ord; Field field = NewStringField("field", termsArray[ord].Utf8ToString(), Field.Store.NO); if (Verbose) { Console.WriteLine(" f=" + termsArray[ord].Utf8ToString()); } doc.Add(field); } ordsForDocSet.Clear(); Array.Sort(ordsForDoc); idToOrds[id] = ordsForDoc; w.AddDocument(doc); } DirectoryReader r = w.GetReader(); w.Dispose(); if (Verbose) { Console.WriteLine("TEST: reader=" + r); } foreach (AtomicReaderContext ctx in r.Leaves) { if (Verbose) { Console.WriteLine("\nTEST: sub=" + ctx.Reader); } Verify((AtomicReader)ctx.Reader, idToOrds, termsArray, null); } // Also test top-level reader: its enum does not support // ord, so this forces the OrdWrapper to run: if (Verbose) { Console.WriteLine("TEST: top reader"); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(r); Verify(slowR, idToOrds, termsArray, null); FieldCache.DEFAULT.PurgeByCacheKey(slowR.CoreCacheKey); r.Dispose(); dir.Dispose(); }
protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms terms) { Assert.AreEqual(1, terms.DocCount); int termCount = new JCG.HashSet <string>(tk.terms).Count; Assert.AreEqual((long)termCount, terms.Count); // LUCENENET specific - cast required because types don't match (xUnit checks this) Assert.AreEqual((long)termCount, terms.SumDocFreq); // LUCENENET specific - cast required because types don't match (xUnit checks this) Assert.AreEqual(ft.StoreTermVectorPositions, terms.HasPositions); Assert.AreEqual(ft.StoreTermVectorOffsets, terms.HasOffsets); Assert.AreEqual(ft.StoreTermVectorPayloads && tk.HasPayloads(), terms.HasPayloads); ISet <BytesRef> uniqueTerms = new JCG.HashSet <BytesRef>(); foreach (string term in tk.freqs.Keys) { uniqueTerms.Add(new BytesRef(term)); } BytesRef[] sortedTerms = uniqueTerms.ToArray(/*new BytesRef[0]*/); Array.Sort(sortedTerms, terms.Comparer); TermsEnum termsEnum = terms.GetEnumerator(Random.NextBoolean() ? null : this.termsEnum.Value); this.termsEnum.Value = termsEnum; for (int i = 0; i < sortedTerms.Length; ++i) { Assert.IsTrue(termsEnum.MoveNext()); Assert.AreEqual(sortedTerms[i], termsEnum.Term); Assert.AreEqual(1, termsEnum.DocFreq); FixedBitSet bits = new FixedBitSet(1); DocsEnum docsEnum = termsEnum.Docs(bits, Random.NextBoolean() ? null : this.docsEnum.Value); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); bits.Set(0); docsEnum = termsEnum.Docs(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsEnum); Assert.IsNotNull(docsEnum); Assert.AreEqual(0, docsEnum.NextDoc()); Assert.AreEqual(0, docsEnum.DocID); Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], docsEnum.Freq); Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsEnum.NextDoc()); this.docsEnum.Value = docsEnum; bits.Clear(0); DocsAndPositionsEnum docsAndPositionsEnum = termsEnum.DocsAndPositions(bits, Random.NextBoolean() ? null : this.docsAndPositionsEnum.Value); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (docsAndPositionsEnum != null) { Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } bits.Set(0); docsAndPositionsEnum = termsEnum.DocsAndPositions(Random.NextBoolean() ? bits : null, Random.NextBoolean() ? null : docsAndPositionsEnum); Assert.AreEqual(ft.StoreTermVectorOffsets || ft.StoreTermVectorPositions, docsAndPositionsEnum != null); if (terms.HasPositions || terms.HasOffsets) { Assert.AreEqual(0, docsAndPositionsEnum.NextDoc()); int freq = docsAndPositionsEnum.Freq; Assert.AreEqual(tk.freqs[termsEnum.Term.Utf8ToString()], freq); if (docsAndPositionsEnum != null) { for (int k = 0; k < freq; ++k) { int position = docsAndPositionsEnum.NextPosition(); ISet <int> indexes; if (terms.HasPositions) { indexes = tk.positionToTerms[position]; Assert.IsNotNull(indexes); } else { indexes = tk.startOffsetToTerms[docsAndPositionsEnum.StartOffset]; Assert.IsNotNull(indexes); } if (terms.HasPositions) { bool foundPosition = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.positions[index] == position) { foundPosition = true; break; } } Assert.IsTrue(foundPosition); } if (terms.HasOffsets) { bool foundOffset = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && tk.startOffsets[index] == docsAndPositionsEnum.StartOffset && tk.endOffsets[index] == docsAndPositionsEnum.EndOffset) { foundOffset = true; break; } } Assert.IsTrue(foundOffset); } if (terms.HasPayloads) { bool foundPayload = false; foreach (int index in indexes) { if (tk.termBytes[index].Equals(termsEnum.Term) && Equals(tk.payloads[index], docsAndPositionsEnum.GetPayload())) { foundPayload = true; break; } } Assert.IsTrue(foundPayload); } } try { docsAndPositionsEnum.NextPosition(); Assert.Fail(); } catch (Exception e) when(e.IsException()) { // ok } catch (Exception e) when(e.IsAssertionError()) { // ok } } Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc()); } this.docsAndPositionsEnum.Value = docsAndPositionsEnum; } Assert.IsFalse(termsEnum.MoveNext()); for (int i = 0; i < 5; ++i) { if (Random.NextBoolean()) { Assert.IsTrue(termsEnum.SeekExact(RandomPicks.RandomFrom(Random, tk.termBytes))); } else { Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(RandomPicks.RandomFrom(Random, tk.termBytes))); } } }