public virtual void TestGetFieldNameVariations() { System.Collections.Generic.ICollection <string> result = reader.GetFieldNames(IndexReader.FieldOption.ALL); Assert.IsTrue(result != null); Assert.IsTrue(result.Count == DocHelper.all.Count); for (System.Collections.IEnumerator iter = result.GetEnumerator(); iter.MoveNext();) { System.String s = (System.String)iter.Current; //System.out.println("Name: " + s); Assert.IsTrue(DocHelper.nameValues.Contains(s) == true || s.Equals("")); } result = reader.GetFieldNames(IndexReader.FieldOption.INDEXED); Assert.IsTrue(result != null); Assert.IsTrue(result.Count == DocHelper.indexed.Count); for (System.Collections.IEnumerator iter = result.GetEnumerator(); iter.MoveNext();) { System.String s = (System.String)iter.Current; Assert.IsTrue(DocHelper.indexed.Contains(s) == true || s.Equals("")); } result = reader.GetFieldNames(IndexReader.FieldOption.UNINDEXED); Assert.IsTrue(result != null); Assert.IsTrue(result.Count == DocHelper.unindexed.Count); //Get all indexed fields that are storing term vectors result = reader.GetFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); Assert.IsTrue(result != null); Assert.IsTrue(result.Count == DocHelper.termvector.Count); result = reader.GetFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR); Assert.IsTrue(result != null); Assert.IsTrue(result.Count == DocHelper.notermvector.Count); }
/// <summary> Test field norms.</summary> private Status.FieldNormStatus TestFieldNorms(System.Collections.Generic.ICollection <string> fieldNames, SegmentReader reader) { Status.FieldNormStatus status = new Status.FieldNormStatus(); try { // Test Field Norms if (infoStream != null) { infoStream.Write(" test: field norms........."); } System.Collections.IEnumerator it = fieldNames.GetEnumerator(); byte[] b = new byte[reader.MaxDoc()]; while (it.MoveNext()) { System.String fieldName = (System.String)it.Current; reader.Norms(fieldName, b, 0); ++status.totFields; } Msg("OK [" + status.totFields + " fields]"); } catch (System.Exception e) { Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); status.error = e; if (infoStream != null) { infoStream.WriteLine(e.StackTrace); } } return(status); }
public virtual void TestPayloadSpanUtil() { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetSimilarity(similarity); Document doc = new Document(); doc.Add(new Field(PayloadHelper.FIELD, "xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory, true); IndexReader reader = searcher.IndexReader; PayloadSpanUtil psu = new PayloadSpanUtil(reader); System.Collections.Generic.ICollection <byte[]> payloads = psu.GetPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr"))); if (DEBUG) { System.Console.Out.WriteLine("Num payloads:" + payloads.Count); } System.Collections.IEnumerator it = payloads.GetEnumerator(); while (it.MoveNext()) { byte[] bytes = (byte[])it.Current; if (DEBUG) { System.Console.Out.WriteLine(new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes))); } } }
private void CheckSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads, int expectedPayloadLength, int expectedFirstByte) { Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); //each position match should have a span associated with it, since there is just one underlying term query, there should //only be one entry in the span int seen = 0; while (spans.Next() == true) { //if we expect payloads, then isPayloadAvailable should be true if (expectedNumPayloads > 0) { Assert.IsTrue(spans.IsPayloadAvailable() == true, "isPayloadAvailable is not returning the correct value: " + spans.IsPayloadAvailable() + " and it should be: " + (expectedNumPayloads > 0)); } else { Assert.IsTrue(spans.IsPayloadAvailable() == false, "isPayloadAvailable should be false"); } //See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token if (spans.IsPayloadAvailable()) { System.Collections.Generic.ICollection <byte[]> payload = spans.GetPayload(); Assert.IsTrue(payload.Count == expectedNumPayloads, "payload Size: " + payload.Count + " is not: " + expectedNumPayloads); for (System.Collections.IEnumerator iterator = payload.GetEnumerator(); iterator.MoveNext();) { byte[] thePayload = (byte[])iterator.Current; Assert.IsTrue(thePayload.Length == expectedPayloadLength, "payload[0] Size: " + thePayload.Length + " is not: " + expectedPayloadLength); Assert.IsTrue(thePayload[0] == expectedFirstByte, thePayload[0] + " does not equal: " + expectedFirstByte); } } seen++; } Assert.IsTrue(seen == expectedNumSpans, seen + " does not equal: " + expectedNumSpans); }
internal void DecRef(System.Collections.Generic.ICollection <string> files) { System.Collections.Generic.IEnumerator <string> it = files.GetEnumerator(); while (it.MoveNext()) { DecRef(it.Current); } }
public virtual bool AddAll(System.Collections.Generic.ICollection <E> collection) { System.Collections.Generic.IEnumerator <E> iterator = collection.GetEnumerator(); while (iterator.MoveNext()) { Add(iterator.Current); } return(true); }
private void AddIndexed(IndexReader reader, FieldInfos fInfos, System.Collections.Generic.ICollection <string> names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions) { System.Collections.Generic.IEnumerator <string> i = names.GetEnumerator(); while (i.MoveNext()) { System.String field = i.Current; fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.HasNorms(field), storePayloads, omitTFAndPositions); } }
internal void DecRef(System.Collections.Generic.ICollection <string> files) { if (files is System.Collections.Hashtable) { System.Collections.IEnumerator it = files.GetEnumerator(); while (it.MoveNext()) { DecRef((System.String)((System.Collections.DictionaryEntry)it.Current).Key); } } else { System.Collections.IEnumerator it = files.GetEnumerator(); while (it.MoveNext()) { DecRef((System.String)it.Current); } } }
/// <summary> Assumes the fields are not storing term vectors. /// /// </summary> /// <param name="names">The names of the fields /// </param> /// <param name="isIndexed">Whether the fields are indexed or not /// /// </param> /// <seealso cref="Add(String, boolean)"> /// </seealso> public void Add(System.Collections.Generic.ICollection <string> names, bool isIndexed) { lock (this) { System.Collections.IEnumerator i = names.GetEnumerator(); while (i.MoveNext()) { Add((System.String)i.Current, isIndexed); } } }
/// <summary>Deletes the specified files, but only if they are new /// (have not yet been incref'd). /// </summary> internal void DeleteNewFiles(System.Collections.Generic.ICollection <string> files) { System.Collections.IEnumerator it = files.GetEnumerator(); while (it.MoveNext()) { System.String fileName = (System.String)it.Current; if (!refCounts.ContainsKey(fileName)) { DeleteFile(fileName); } } }
/// <summary>Deletes the specified files, but only if they are new /// (have not yet been incref'd). /// </summary> internal void DeleteNewFiles(System.Collections.Generic.ICollection <string> files) { System.Collections.IEnumerator it = files.GetEnumerator(); while (it.MoveNext()) { System.String fileName = (System.String)it.Current; if (!refCounts.ContainsKey(fileName)) { if (infoStream != null) { Message("delete new file \"" + fileName + "\""); } DeleteFile(fileName); } } }
public virtual void TestShrinkToAfterShortestMatch3() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new TestPayloadAnalyzer(this), IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("content", new System.IO.StreamReader(new System.IO.MemoryStream(System.Text.Encoding.ASCII.GetBytes("j k a l f k k p a t a k l k t a"))))); writer.AddDocument(doc); writer.Close(); IndexSearcher is_Renamed = new IndexSearcher(directory, true); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); Spans spans = snq.GetSpans(is_Renamed.IndexReader); TopDocs topDocs = is_Renamed.Search(snq, 1); System.Collections.Hashtable payloadSet = new System.Collections.Hashtable(); for (int i = 0; i < topDocs.ScoreDocs.Length; i++) { while (spans.Next()) { System.Collections.Generic.ICollection <byte[]> payloads = spans.GetPayload(); for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();) { CollectionsHelper.AddIfNotContains(payloadSet, new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current))); } } } Assert.AreEqual(2, payloadSet.Count); if (DEBUG) { System.Collections.IEnumerator pit = payloadSet.GetEnumerator(); while (pit.MoveNext()) { System.Console.Out.WriteLine("match:" + pit.Current); } } Assert.IsTrue(payloadSet.Contains("a:Noise:10")); Assert.IsTrue(payloadSet.Contains("k:Noise:11")); }
/// <summary> Return a query that will return docs like the passed file. /// /// </summary> /// <returns> a query that will return docs like the passed file. /// </returns> public Query Like(System.IO.FileInfo f) { if (fieldNames == null) { // gather list of valid fields from lucene System.Collections.Generic.ICollection <string> fields = ir.GetFieldNames(IndexReader.FieldOption.INDEXED); System.Collections.IEnumerator e = fields.GetEnumerator(); fieldNames = new System.String[fields.Count]; int index = 0; while (e.MoveNext()) { fieldNames[index++] = (System.String)e.Current; } } return(Like(new System.IO.StreamReader(f.FullName, System.Text.Encoding.Default))); }
/// <summary> Return a query that will return docs like the passed lucene document ID. /// /// </summary> /// <param name="docNum">the documentID of the lucene doc to generate the 'More Like This" query for. /// </param> /// <returns> a query that will return docs like the passed lucene document ID. /// </returns> public Query Like(int docNum) { if (fieldNames == null) { // gather list of valid fields from lucene System.Collections.Generic.ICollection <string> fields = ir.GetFieldNames(IndexReader.FieldOption.INDEXED); System.Collections.IEnumerator e = fields.GetEnumerator(); fieldNames = new System.String[fields.Count]; int index = 0; while (e.MoveNext()) { fieldNames[index++] = (System.String)e.Current; } } return(CreateQuery(RetrieveTerms(docNum))); }
public /*internal*/ System.Collections.Generic.ICollection <string> CreateCompoundFile(System.String fileName) { System.Collections.Generic.ICollection <string> files = GetMergedFiles(); CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort); // Now merge all added files System.Collections.IEnumerator it = files.GetEnumerator(); while (it.MoveNext()) { cfsWriter.AddFile((System.String)it.Current); } // Perform the merge cfsWriter.Close(); return(files); }
/// <summary>Add an IndexReader whose stored fields will not be returned. This can /// accellerate search when stored fields are only needed from a subset of /// the IndexReaders. /// /// </summary> /// <throws> IllegalArgumentException if not all indexes contain the same number </throws> /// <summary> of documents /// </summary> /// <throws> IllegalArgumentException if not all indexes have the same value </throws> /// <summary> of {@link IndexReader#MaxDoc()} /// </summary> /// <throws> IOException if there is a low-level IO error </throws> public virtual void Add(IndexReader reader, bool ignoreStoredFields) { EnsureOpen(); if (readers.Count == 0) { this.maxDoc = reader.MaxDoc(); this.numDocs = reader.NumDocs(); this.hasDeletions = reader.HasDeletions(); } if (reader.MaxDoc() != maxDoc) { // check compatibility throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc()); } if (reader.NumDocs() != numDocs) { throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); } System.Collections.Generic.ICollection <string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); readerToFields[reader] = fields; System.Collections.IEnumerator i = fields.GetEnumerator(); while (i.MoveNext()) { // update fieldToReader map System.String field = (System.String)i.Current; if (fieldToReader[field] == null) { fieldToReader[field] = reader; } } if (!ignoreStoredFields) { storedFieldReaders.Add(reader); // add to storedFieldReaders } readers.Add(reader); if (incRefReaders) { reader.IncRef(); } decrefOnClose.Add(incRefReaders); }
private void CopyFiles(Directory dir, IndexCommit cp) { // While we hold the snapshot, and nomatter how long // we take to do the backup, the IndexWriter will // never delete the files in the snapshot: System.Collections.Generic.ICollection <string> files = cp.GetFileNames(); System.Collections.IEnumerator it = files.GetEnumerator(); while (it.MoveNext()) { System.String fileName = (System.String)it.Current; // NOTE: in a real backup you would not use // readFile; you would need to use something else // that copies the file to a backup location. This // could even be a spawned shell process (eg "tar", // "zip") that takes the list of files and builds a // backup. ReadFile(dir, fileName); } }
private void CheckSpans(Spans spans, int numSpans, int[] numPayloads) { int cnt = 0; while (spans.Next() == true) { if (DEBUG) { System.Console.Out.WriteLine("\nSpans Dump --"); } if (spans.IsPayloadAvailable()) { System.Collections.Generic.ICollection <byte[]> payload = spans.GetPayload(); if (DEBUG) { System.Console.Out.WriteLine("payloads for span:" + payload.Count); } System.Collections.IEnumerator it = payload.GetEnumerator(); while (it.MoveNext()) { byte[] bytes = (byte[])it.Current; if (DEBUG) { System.Console.Out.WriteLine("doc:" + spans.Doc() + " s:" + spans.Start() + " e:" + spans.End() + " " + new System.String(System.Text.UTF8Encoding.UTF8.GetChars(bytes))); } } Assert.AreEqual(numPayloads[cnt], payload.Count); } else { Assert.IsFalse(numPayloads.Length > 0 && numPayloads[cnt] > 0, "Expected spans:" + numPayloads[cnt] + " found: 0"); } cnt++; } Assert.AreEqual(numSpans, cnt); }
private SegmentInfo Merge(SegmentInfo si1, SegmentInfo si2, System.String merged, bool useCompoundFile) { SegmentReader r1 = SegmentReader.Get(true, si1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); SegmentReader r2 = SegmentReader.Get(true, si2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); SegmentMerger merger = new SegmentMerger(si1.dir, merged); merger.Add(r1); merger.Add(r2); merger.Merge(); merger.CloseReaders(); if (useCompoundFile) { System.Collections.Generic.ICollection <string> filesToDelete = merger.CreateCompoundFile(merged + ".cfs"); for (System.Collections.IEnumerator iter = filesToDelete.GetEnumerator(); iter.MoveNext();) { si1.dir.DeleteFile((System.String)iter.Current); } } return(new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, useCompoundFile, true)); }
public IIterator <T> Iterator() { return(new EnumeratorWrapper <T>(collection.GetEnumerator())); }
public virtual void TestPayloadsPos0() { for (int x = 0; x < 2; x++) { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); if (x == 1) { writer.SetAllowMinus1Position(); } Document doc = new Document(); System.IO.MemoryStream ms = new System.IO.MemoryStream(); System.IO.StreamWriter sw = new System.IO.StreamWriter(ms); sw.Write("a a b c d e a f g h i j a b k k"); // flush to stream & reset it's position so it can be read sw.Flush(); ms.Position = 0; doc.Add(new Field("content", new System.IO.StreamReader(ms))); writer.AddDocument(doc); IndexReader r = writer.GetReader(); TermPositions tp = r.TermPositions(new Term("content", "a")); int count = 0; Assert.IsTrue(tp.Next()); // "a" occurs 4 times Assert.AreEqual(4, tp.Freq()); int expected; if (x == 1) { expected = System.Int32.MaxValue; } else { expected = 0; } Assert.AreEqual(expected, tp.NextPosition()); if (x == 1) { continue; } Assert.AreEqual(1, tp.NextPosition()); Assert.AreEqual(3, tp.NextPosition()); Assert.AreEqual(6, tp.NextPosition()); // only one doc has "a" Assert.IsFalse(tp.Next()); IndexSearcher is_Renamed = new IndexSearcher(r); SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); SpanQuery[] sqs = new SpanQuery[] { stq1, stq2 }; SpanNearQuery snq = new SpanNearQuery(sqs, 30, false); count = 0; bool sawZero = false; //System.out.println("\ngetPayloadSpans test"); Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.GetIndexReader()); while (pspans.Next()) { //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end()); System.Collections.Generic.ICollection <byte[]> payloads = pspans.GetPayload(); sawZero |= pspans.Start() == 0; for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();) { count++; System.Object generatedAux2 = it.Current; //System.out.println(new String((byte[]) it.next())); } } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); //System.out.println("\ngetSpans test"); Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.GetIndexReader()); count = 0; sawZero = false; while (spans.Next()) { count++; sawZero |= spans.Start() == 0; //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end()); } Assert.AreEqual(4, count); Assert.IsTrue(sawZero); //System.out.println("\nPayloadSpanUtil test"); sawZero = false; PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.GetIndexReader()); System.Collections.Generic.ICollection <byte[]> pls = psu.GetPayloadsForQuery(snq); count = pls.Count; for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();) { System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[])it.Current)); //System.out.println(s); sawZero |= s.Equals("pos: 0"); } Assert.AreEqual(5, count); Assert.IsTrue(sawZero); writer.Close(); is_Renamed.GetIndexReader().Close(); dir.Close(); } }